[compiler-rt] [sanitizer][asan][msvc] Teach GetInstructionSize about many instructions that appear in MSVC generated code. (PR #69490)

Charlie Barto via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 30 14:01:13 PDT 2023


https://github.com/barcharcraz updated https://github.com/llvm/llvm-project/pull/69490

>From ba4c1a934797896b12f151900304538952826fc3 Mon Sep 17 00:00:00 2001
From: Charlie Barto <Charles.Barto at microsoft.com>
Date: Fri, 19 May 2023 15:19:03 -0700
Subject: [PATCH 1/5] Teach GetInstructionSize about many instructions that
 appear in MSVC generated code.

---
 .../lib/interception/interception_win.cpp     | 160 ++++++++++++++----
 1 file changed, 130 insertions(+), 30 deletions(-)

diff --git a/compiler-rt/lib/interception/interception_win.cpp b/compiler-rt/lib/interception/interception_win.cpp
index 1b681ada37b170d..7c5bad1a1d72bf5 100644
--- a/compiler-rt/lib/interception/interception_win.cpp
+++ b/compiler-rt/lib/interception/interception_win.cpp
@@ -494,6 +494,11 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
     case 0x6A:  // 6A XX = push XX
       return 2;
 
+    // This instruction can be encoded with a 16-bit immediate but that is
+    // incredibly unlikely.
+    case 0x68:  // 68 XX XX XX XX : push imm32
+      return 5;
+
     case 0xb8:  // b8 XX XX XX XX : mov eax, XX XX XX XX
     case 0xB9:  // b9 XX XX XX XX : mov ecx, XX XX XX XX
       return 5;
@@ -530,8 +535,13 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
     case 0xE589:  // 89 E5 : mov ebp, esp
     case 0xC18B:  // 8B C1 : mov eax, ecx
     case 0xC033:  // 33 C0 : xor eax, eax
+    case 0x8bec:  // EC 8B : mov ebp, esp
     case 0xC933:  // 33 C9 : xor ecx, ecx
     case 0xD233:  // 33 D2 : xor edx, edx
+    case 0xc084:  // 84 c0 : test al,al
+    case 0xdb84:  // 84 db : test bl,bl
+    case 0xc984:  // 84 c9 : test cl,cl
+    case 0xd284:  // 84 d2 : test dl,dl
       return 2;
 
     // Cannot overwrite control-instruction. Return 0 to indicate failure.
@@ -540,6 +550,9 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
   }
 
   switch (0x00FFFFFF & *(u32*)address) {
+    case 0x83e4f8:  // F8 E4 83 : and esp, 0xFFFFFFF8
+    case 0x83ec64:  // 64 EC 83 : sub esp, 64h
+      return 3;
     case 0x24A48D:  // 8D A4 24 XX XX XX XX : lea esp, [esp + XX XX XX XX]
       return 7;
   }
@@ -549,6 +562,21 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
     case 0xA1:  // A1 XX XX XX XX XX XX XX XX :
                 //   movabs eax, dword ptr ds:[XXXXXXXX]
       return 9;
+    case 0xf2:
+      switch (*(u32 *)(address + 1)) {
+        case 0x2444110f:  // f2 0f 11 44 24 XX    movsd   mmword ptr [rsp + XX],
+                          // xmm0
+        case 0x244c110f:  //  f2 0f 11 4c 24 XX       movsd  QWORD PTR
+                          //  [rsp+0x8],xmm1
+        case 0x2454110f:  //  f2 0f 11 54 24 XX       movsd  QWORD PTR
+                          //  [rsp+0x8],xmm2
+        case 0x245c110f:  //  f2 0f 11 5c 24 XX       movsd  QWORD PTR
+                          //  [rsp+0x8],xmm3
+        case 0x2464110f:  //  f2 0f 11 64 24 XX       movsd  QWORD PTR
+                          //  [rsp+0x8],xmm4
+          return 6;
+      }
+      break;
 
     case 0x83:
       const u8 next_byte = *(u8*)(address + 1);
@@ -577,48 +605,116 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
     case 0x018a:  // mov al, byte ptr [rcx]
       return 2;
 
+    case 0x7e80:  // 80 7e YY XX  cmp BYTE PTR [rsi+YY], XX
+    case 0x7d80:  // 80 7d YY XX  cmp BYTE PTR [rdx+YY], XX
+    case 0x7a80:  // 80 7a YY XX  cmp BYTE PTR [rdx+YY], XX
+    case 0x7880:  // 80 78 YY XX  cmp BYTE PTR [rax+YY], XX
+    case 0x7b80:  // 80 7b YY XX  cmp BYTE PTR [rbx+YY], XX
+    case 0x7980:  // 80 79 YY XX  cmp BYTE ptr [rcx+YY], XX
+      return 4;
+
     case 0x058B:  // 8B 05 XX XX XX XX : mov eax, dword ptr [XX XX XX XX]
       if (rel_offset)
         *rel_offset = 2;
       return 6;
+
+    case 0x7e81:  // 81 7e YY XX XX XX XX  cmp DWORD PTR [rsi+YY], XX XX XX XX
+    case 0x7d81:  // 81 7d YY XX XX XX XX  cmp DWORD PTR [rdx+YY], XX XX XX XX
+    case 0x7a81:  // 81 7a YY XX XX XX XX  cmp DWORD PTR [rdx+YY], XX XX XX XX
+    case 0x7881:  // 81 78 YY XX XX XX XX  cmp DWORD PTR [rax+YY], XX XX XX XX
+    case 0x7b81:  // 81 78 YY XX XX XX XX  cmp DWORD PTR [rbx+YY], XX XX XX XX
+    case 0x7981:  // 81 79 YY XX XX XX XX  cmp dword ptr [rcx+YY], XX XX XX XX
+      return 7;
   }
 
   switch (0x00FFFFFF & *(u32*)address) {
-    case 0xe58948:    // 48 8b c4 : mov rbp, rsp
-    case 0xc18b48:    // 48 8b c1 : mov rax, rcx
-    case 0xc48b48:    // 48 8b c4 : mov rax, rsp
-    case 0xd9f748:    // 48 f7 d9 : neg rcx
-    case 0xd12b48:    // 48 2b d1 : sub rdx, rcx
-    case 0x07c1f6:    // f6 c1 07 : test cl, 0x7
-    case 0xc98548:    // 48 85 C9 : test rcx, rcx
-    case 0xd28548:    // 48 85 d2 : test rdx, rdx
-    case 0xc0854d:    // 4d 85 c0 : test r8, r8
-    case 0xc2b60f:    // 0f b6 c2 : movzx eax, dl
-    case 0xc03345:    // 45 33 c0 : xor r8d, r8d
-    case 0xc93345:    // 45 33 c9 : xor r9d, r9d
-    case 0xdb3345:    // 45 33 DB : xor r11d, r11d
-    case 0xd98b4c:    // 4c 8b d9 : mov r11, rcx
-    case 0xd28b4c:    // 4c 8b d2 : mov r10, rdx
-    case 0xc98b4c:    // 4C 8B C9 : mov r9, rcx
-    case 0xc18b4c:    // 4C 8B C1 : mov r8, rcx
-    case 0xd2b60f:    // 0f b6 d2 : movzx edx, dl
-    case 0xca2b48:    // 48 2b ca : sub rcx, rdx
-    case 0x10b70f:    // 0f b7 10 : movzx edx, WORD PTR [rax]
-    case 0xc00b4d:    // 3d 0b c0 : or r8, r8
-    case 0xc08b41:    // 41 8b c0 : mov eax, r8d
-    case 0xd18b48:    // 48 8b d1 : mov rdx, rcx
-    case 0xdc8b4c:    // 4c 8b dc : mov r11, rsp
-    case 0xd18b4c:    // 4c 8b d1 : mov r10, rcx
-    case 0xE0E483:    // 83 E4 E0 : and esp, 0xFFFFFFE0
+    case 0x07c1f6:  // f6 c1 07 : test cl, 0x7
+    case 0x10b70f:  // 0f b7 10 : movzx edx, word ptr [rax]
+    case 0xc00b4d:  // 3d 0b c0 : or r8, r8
+    case 0xc03345:  // 45 33 c0 : xor r8d, r8d
+    case 0xc08548:  // 48 85 c0 : test rax, rax
+    case 0xc0854d:  // 4d 85 c0 : test r8, r8
+    case 0xc08b41:  // 41 8b c0 : mov eax, r8d
+    case 0xc0ff48:  // 48 ff c0 : inc rax
+    case 0xc0ff49:  // 49 ff c0 : inc r8
+    case 0xc18b41:  // 41 8b c1 : mov eax, r9d
+    case 0xc18b48:  // 48 8b c1 : mov rax, rcx
+    case 0xc18b4c:  // 4c 8b c1 : mov r8, rcx
+    case 0xc1ff48:  // 48 ff c1 : inc rcx
+    case 0xc1ff49:  // 49 ff c1 : inc r9
+    case 0xc28b41:  // 41 8b c2 : mov eax, r10d
+    case 0xc2b60f:  // 0f b6 c2 : movzx eax, dl
+    case 0xc2ff48:  // 48 ff c2 : inc rdx
+    case 0xc2ff49:  // 49 ff c2 : inc r10
+    case 0xc38b41:  // 41 8b c3 : mov eax, r11d
+    case 0xc3ff48:  // 48 ff c3 : inc rbx
+    case 0xc3ff49:  // 49 ff c3 : inc r11
+    case 0xc48b41:  // 41 8b c4 : mov eax, r12d
+    case 0xc48b48:  // 48 8b c4 : mov rax, rsp
+    case 0xc4ff49:  // 49 ff c4 : inc r12
+    case 0xc5ff49:  // 49 ff c5 : inc r13
+    case 0xc6ff48:  // 48 ff c6 : inc rsi
+    case 0xc6ff49:  // 49 ff c6 : inc r14
+    case 0xc7ff48:  // 48 ff c7 : inc rdi
+    case 0xc7ff49:  // 49 ff c7 : inc r15
+    case 0xc93345:  // 45 33 c9 : xor r9d, r9d
+    case 0xc98548:  // 48 85 c9 : test rcx, rcx
+    case 0xc9854d:  // 4d 85 c9 : test r9, r9
+    case 0xc98b4c:  // 4c 8b c9 : mov r9, rcx
+    case 0xca2b48:  // 48 2b ca : sub rcx, rdx
+    case 0xd12b48:  // 48 2b d1 : sub rdx, rcx
+    case 0xd18b48:  // 48 8b d1 : mov rdx, rcx
+    case 0xd18b4c:  // 4c 8b d1 : mov r10, rcx
+    case 0xd28548:  // 48 85 d2 : test rdx, rdx
+    case 0xd2854d:  // 4d 85 d2 : test r10, r10
+    case 0xd28b4c:  // 4c 8b d2 : mov r10, rdx
+    case 0xd2b60f:  // 0f b6 d2 : movzx edx, dl
+    case 0xd98b4c:  // 4c 8b d9 : mov r11, rcx
+    case 0xd9f748:  // 48 f7 d9 : neg rcx
+    case 0xdb3345:  // 45 33 db : xor r11d, r11d
+    case 0xdb8548:  // 48 85 db : test rbx, rbx
+    case 0xdb854d:  // 4d 85 db : test r11, r11
+    case 0xdc8b4c:  // 4c 8b dc : mov r11, rsp
+    case 0xe0e483:  // 83 e4 e0 : and esp, 0xffffffe0
+    case 0xe48548:  // 48 85 e4 : test rsp, rsp
+    case 0xe4854d:  // 4d 85 e4 : test r12, r12
+    case 0xe58948:  // 48 8b c4 : mov rbp, rsp
+    case 0xed8548:  // 48 85 ed : test rbp, rbp
+    case 0xed854d:  // 4d 85 ed : test r13, r13
+    case 0xf6854d:  // 4d 85 f6 : test r14, r14
+    case 0xff854d:  // 4d 85 ff : test r15, r15
       return 3;
 
-    case 0xec8348:    // 48 83 ec XX : sub rsp, XX
-    case 0xf88349:    // 49 83 f8 XX : cmp r8, XX
-    case 0x588948:    // 48 89 58 XX : mov QWORD PTR[rax + XX], rbx
+    case 0x245489:  // 89 54 24 XX : mov DWORD PTR[rsp + XX], edx
+    case 0x428d44:  // 44 8d 42 XX : lea r8d , [rdx + XX]
+    case 0x588948:  // 48 89 58 XX : mov QWORD PTR[rax + XX], rbx
+    case 0xec8348:  // 48 83 ec XX : sub rsp, XX
+    case 0xf88349:  // 49 83 f8 XX : cmp r8, XX
       return 4;
 
+    case 0x246483:  // 83 64 24 00 00 :   and    DWORD PTR [rsp+xx],0x0
+      return 5;
+
+    case 0x788166:  // 66 81 78 YY XX XX  cmp WORD PTR [rax+0xYY], XX XX
+    case 0x798166:  // 66 81 79 YY XX XX  cmp WORD PTR [rcx+0xYY], XX XX
+    case 0x7a8166:  // 66 81 7a YY XX XX  cmp WORD PTR [rdx+0xYY], XX XX
+    case 0x7b8166:  // 66 81 7b YY XX XX  cmp WORD PTR [rbx+0xYY], XX XX
+    case 0x7e8166:  // 66 81 7e YY XX XX  cmp WORD PTR [rsi+0xYY], XX XX
+    case 0x7f8166:  // 66 81 7f YY XX XX  cmp WORD PTR [rdi+0xYY], XX XX
+      return 6;
+
     case 0xec8148:    // 48 81 EC XX XX XX XX : sub rsp, XXXXXXXX
       return 7;
+    case 0x788141: // 41 81 78 YY XX XX XX XX cmp DWORD PTR [r8+YY], XX XX XX XX
+    case 0x798141: // r9
+    case 0x7a8141: //r10
+    case 0x7b8141: //r11
+    case 0x7c8141: //r12
+    case 0x7d8141: //r13
+    case 0x7e8141: //r14
+    case 0x7f8141: // 41 81 78 YY XX XX XX XX cmp DWORD P [r15+YY], XX XX XX XX
+    case 0x247c81: // 81 7c 24 YY XX XX XX XX cmp DWORD P [rsp+YY], XX XX XX XX
+      return 8;
 
     case 0x058b48:    // 48 8b 05 XX XX XX XX :
                       //   mov rax, QWORD PTR [rip + XXXXXXXX]
@@ -645,8 +741,11 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
     case 0x24548948:  // 48 89 54 24 XX : mov QWORD PTR [rsp + XX], rdx
     case 0x244c894c:  // 4c 89 4c 24 XX : mov QWORD PTR [rsp + XX], r9
     case 0x2444894c:  // 4c 89 44 24 XX : mov QWORD PTR [rsp + XX], r8
+    case 0x244c8944:  // 44 89 4c 24 XX   mov DWORD PTR [rsp + XX], r9d
+    case 0x24448944:  // 44 89 44 24 XX   mov DWORD PTR [rsp + XX], r8d
+    case 0x246c8d48:  // 48 8d 6c 24 XX : lea rbp, [rsp + XX]
       return 5;
-    case 0x24648348:  // 48 83 64 24 XX : and QWORD PTR [rsp + XX], YY
+    case 0x24648348:  // 48 83 64 24 XX YY : and QWORD PTR [rsp + XX], YY
       return 6;
   }
 
@@ -660,6 +759,7 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
     case 0x458B:  // 8B 45 XX : mov eax, dword ptr [ebp + XX]
     case 0x5D8B:  // 8B 5D XX : mov ebx, dword ptr [ebp + XX]
     case 0x7D8B:  // 8B 7D XX : mov edi, dword ptr [ebp + XX]
+    case 0x758B:  // 8B 75 XX : mov esi, dword ptr [ebp + XX]
     case 0xEC83:  // 83 EC XX : sub esp, XX
     case 0x75FF:  // FF 75 XX : push dword ptr [ebp + XX]
       return 3;

>From b5b73ce1cf1803f0aa2b7faac5d8d4fb56ec50c8 Mon Sep 17 00:00:00 2001
From: Charlie Barto <Charles.Barto at microsoft.com>
Date: Fri, 19 May 2023 17:05:55 -0700
Subject: [PATCH 2/5] [sanitizer][asan][msvc] Teach GetInstructionSize about
 many instructions that appear in MSVC generated code.

MSVC can sometimes generates instructions in function prefixes that asan previously didn't know the size of, this teaches asan those sizes. This isn't hilariously useful for using ASAN with non-msvc compilers, but it does stand alone.

Differential Revision: https://reviews.llvm.org/D151008
---
 .../lib/interception/interception_win.cpp     | 41 ++++++++++---------
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/compiler-rt/lib/interception/interception_win.cpp b/compiler-rt/lib/interception/interception_win.cpp
index 7c5bad1a1d72bf5..10ac87ac729f605 100644
--- a/compiler-rt/lib/interception/interception_win.cpp
+++ b/compiler-rt/lib/interception/interception_win.cpp
@@ -564,17 +564,17 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
       return 9;
     case 0xf2:
       switch (*(u32 *)(address + 1)) {
-        case 0x2444110f:  // f2 0f 11 44 24 XX    movsd   mmword ptr [rsp + XX],
-                          // xmm0
-        case 0x244c110f:  //  f2 0f 11 4c 24 XX       movsd  QWORD PTR
-                          //  [rsp+0x8],xmm1
-        case 0x2454110f:  //  f2 0f 11 54 24 XX       movsd  QWORD PTR
-                          //  [rsp+0x8],xmm2
-        case 0x245c110f:  //  f2 0f 11 5c 24 XX       movsd  QWORD PTR
-                          //  [rsp+0x8],xmm3
-        case 0x2464110f:  //  f2 0f 11 64 24 XX       movsd  QWORD PTR
-                          //  [rsp+0x8],xmm4
-          return 6;
+          case 0x2444110f:  // f2 0f 11 44 24 XX    movsd   mmword ptr [rsp +
+                            // XX], xmm0
+          case 0x244c110f:  //  f2 0f 11 4c 24 XX       movsd  QWORD PTR
+                            //  [rsp+0x8],xmm1
+          case 0x2454110f:  //  f2 0f 11 54 24 XX       movsd  QWORD PTR
+                            //  [rsp+0x8],xmm2
+          case 0x245c110f:  //  f2 0f 11 5c 24 XX       movsd  QWORD PTR
+                            //  [rsp+0x8],xmm3
+          case 0x2464110f:  //  f2 0f 11 64 24 XX       movsd  QWORD PTR
+                            //  [rsp+0x8],xmm4
+            return 6;
       }
       break;
 
@@ -705,15 +705,16 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
 
     case 0xec8148:    // 48 81 EC XX XX XX XX : sub rsp, XXXXXXXX
       return 7;
-    case 0x788141: // 41 81 78 YY XX XX XX XX cmp DWORD PTR [r8+YY], XX XX XX XX
-    case 0x798141: // r9
-    case 0x7a8141: //r10
-    case 0x7b8141: //r11
-    case 0x7c8141: //r12
-    case 0x7d8141: //r13
-    case 0x7e8141: //r14
-    case 0x7f8141: // 41 81 78 YY XX XX XX XX cmp DWORD P [r15+YY], XX XX XX XX
-    case 0x247c81: // 81 7c 24 YY XX XX XX XX cmp DWORD P [rsp+YY], XX XX XX XX
+    case 0x788141:  // 41 81 78 YY XX XX XX XX cmp DWORD PTR [r8+YY], XX XX XX
+                    // XX
+    case 0x798141:  // r9
+    case 0x7a8141:  // r10
+    case 0x7b8141:  // r11
+    case 0x7c8141:  // r12
+    case 0x7d8141:  // r13
+    case 0x7e8141:  // r14
+    case 0x7f8141:  // 41 81 78 YY XX XX XX XX cmp DWORD P [r15+YY], XX XX XX XX
+    case 0x247c81:  // 81 7c 24 YY XX XX XX XX cmp DWORD P [rsp+YY], XX XX XX XX
       return 8;
 
     case 0x058b48:    // 48 8b 05 XX XX XX XX :

>From 5f900ffea66de3d906038154d6350baa27f94c79 Mon Sep 17 00:00:00 2001
From: Charlie Barto <Charles.Barto at microsoft.com>
Date: Wed, 31 May 2023 13:08:09 -0700
Subject: [PATCH 3/5] correct instruction encodings

---
 .../lib/interception/interception_win.cpp     | 63 +++++++++----------
 1 file changed, 30 insertions(+), 33 deletions(-)

diff --git a/compiler-rt/lib/interception/interception_win.cpp b/compiler-rt/lib/interception/interception_win.cpp
index 10ac87ac729f605..c02291beddb771d 100644
--- a/compiler-rt/lib/interception/interception_win.cpp
+++ b/compiler-rt/lib/interception/interception_win.cpp
@@ -535,13 +535,11 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
     case 0xE589:  // 89 E5 : mov ebp, esp
     case 0xC18B:  // 8B C1 : mov eax, ecx
     case 0xC033:  // 33 C0 : xor eax, eax
-    case 0x8bec:  // EC 8B : mov ebp, esp
     case 0xC933:  // 33 C9 : xor ecx, ecx
     case 0xD233:  // 33 D2 : xor edx, edx
-    case 0xc084:  // 84 c0 : test al,al
-    case 0xdb84:  // 84 db : test bl,bl
-    case 0xc984:  // 84 c9 : test cl,cl
-    case 0xd284:  // 84 d2 : test dl,dl
+    case 0xDB84:  // 84 DB : test bl,bl
+    case 0xC984:  // 84 C9 : test cl,cl
+    case 0xD284:  // 84 D2 : test dl,dl
       return 2;
 
     // Cannot overwrite control-instruction. Return 0 to indicate failure.
@@ -550,8 +548,8 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
   }
 
   switch (0x00FFFFFF & *(u32*)address) {
-    case 0x83e4f8:  // F8 E4 83 : and esp, 0xFFFFFFF8
-    case 0x83ec64:  // 64 EC 83 : sub esp, 64h
+    case 0xF8E484:  // 83 E4 F8 : and esp, 0xFFFFFFF8
+    case 0x64EC83:  // 83 EC 64 : sub esp, 64h
       return 3;
     case 0x24A48D:  // 8D A4 24 XX XX XX XX : lea esp, [esp + XX XX XX XX]
       return 7;
@@ -562,7 +560,7 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
     case 0xA1:  // A1 XX XX XX XX XX XX XX XX :
                 //   movabs eax, dword ptr ds:[XXXXXXXX]
       return 9;
-    case 0xf2:
+    case 0xF2:
       switch (*(u32 *)(address + 1)) {
           case 0x2444110f:  // f2 0f 11 44 24 XX    movsd   mmword ptr [rsp +
                             // XX], xmm0
@@ -601,15 +599,15 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
     case 0x5641:  // push r14
     case 0x5741:  // push r15
     case 0x9066:  // Two-byte NOP
-    case 0xc084:  // test al, al
-    case 0x018a:  // mov al, byte ptr [rcx]
+    case 0xC084:  // test al, al
+    case 0x018A:  // mov al, byte ptr [rcx]
       return 2;
 
-    case 0x7e80:  // 80 7e YY XX  cmp BYTE PTR [rsi+YY], XX
-    case 0x7d80:  // 80 7d YY XX  cmp BYTE PTR [rdx+YY], XX
-    case 0x7a80:  // 80 7a YY XX  cmp BYTE PTR [rdx+YY], XX
+    case 0x7E80:  // 80 7E YY XX  cmp BYTE PTR [rsi+YY], XX
+    case 0x7D80:  // 80 7D YY XX  cmp BYTE PTR [rdx+YY], XX
+    case 0x7A80:  // 80 7A YY XX  cmp BYTE PTR [rdx+YY], XX
     case 0x7880:  // 80 78 YY XX  cmp BYTE PTR [rax+YY], XX
-    case 0x7b80:  // 80 7b YY XX  cmp BYTE PTR [rbx+YY], XX
+    case 0x7B80:  // 80 7B YY XX  cmp BYTE PTR [rbx+YY], XX
     case 0x7980:  // 80 79 YY XX  cmp BYTE ptr [rcx+YY], XX
       return 4;
 
@@ -618,11 +616,11 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
         *rel_offset = 2;
       return 6;
 
-    case 0x7e81:  // 81 7e YY XX XX XX XX  cmp DWORD PTR [rsi+YY], XX XX XX XX
-    case 0x7d81:  // 81 7d YY XX XX XX XX  cmp DWORD PTR [rdx+YY], XX XX XX XX
-    case 0x7a81:  // 81 7a YY XX XX XX XX  cmp DWORD PTR [rdx+YY], XX XX XX XX
+    case 0x7E81:  // 81 7E YY XX XX XX XX  cmp DWORD PTR [rsi+YY], XX XX XX XX
+    case 0x7D81:  // 81 7D YY XX XX XX XX  cmp DWORD PTR [rdx+YY], XX XX XX XX
+    case 0x7A81:  // 81 7A YY XX XX XX XX  cmp DWORD PTR [rdx+YY], XX XX XX XX
     case 0x7881:  // 81 78 YY XX XX XX XX  cmp DWORD PTR [rax+YY], XX XX XX XX
-    case 0x7b81:  // 81 78 YY XX XX XX XX  cmp DWORD PTR [rbx+YY], XX XX XX XX
+    case 0x7B81:  // 81 78 YY XX XX XX XX  cmp DWORD PTR [rbx+YY], XX XX XX XX
     case 0x7981:  // 81 79 YY XX XX XX XX  cmp dword ptr [rcx+YY], XX XX XX XX
       return 7;
   }
@@ -692,27 +690,26 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
     case 0xf88349:  // 49 83 f8 XX : cmp r8, XX
       return 4;
 
-    case 0x246483:  // 83 64 24 00 00 :   and    DWORD PTR [rsp+xx],0x0
+    case 0x246483:  // 83 64 24 XX YY :   and    DWORD PTR [rsp+XX], YY
       return 5;
 
-    case 0x788166:  // 66 81 78 YY XX XX  cmp WORD PTR [rax+0xYY], XX XX
-    case 0x798166:  // 66 81 79 YY XX XX  cmp WORD PTR [rcx+0xYY], XX XX
-    case 0x7a8166:  // 66 81 7a YY XX XX  cmp WORD PTR [rdx+0xYY], XX XX
-    case 0x7b8166:  // 66 81 7b YY XX XX  cmp WORD PTR [rbx+0xYY], XX XX
-    case 0x7e8166:  // 66 81 7e YY XX XX  cmp WORD PTR [rsi+0xYY], XX XX
-    case 0x7f8166:  // 66 81 7f YY XX XX  cmp WORD PTR [rdi+0xYY], XX XX
+    case 0x788166:  // 66 81 78 XX YY YY  cmp WORD PTR [rax+XX], YY YY
+    case 0x798166:  // 66 81 79 XX YY YY  cmp WORD PTR [rcx+XX], YY YY
+    case 0x7a8166:  // 66 81 7a XX YY YY  cmp WORD PTR [rdx+XX], YY YY
+    case 0x7b8166:  // 66 81 7b XX YY YY  cmp WORD PTR [rbx+XX], YY YY
+    case 0x7e8166:  // 66 81 7e XX YY YY  cmp WORD PTR [rsi+XX], YY YY
+    case 0x7f8166:  // 66 81 7f XX YY YY  cmp WORD PTR [rdi+XX], YY YY
       return 6;
 
     case 0xec8148:    // 48 81 EC XX XX XX XX : sub rsp, XXXXXXXX
       return 7;
-    case 0x788141:  // 41 81 78 YY XX XX XX XX cmp DWORD PTR [r8+YY], XX XX XX
-                    // XX
-    case 0x798141:  // r9
-    case 0x7a8141:  // r10
-    case 0x7b8141:  // r11
-    case 0x7c8141:  // r12
-    case 0x7d8141:  // r13
-    case 0x7e8141:  // r14
+    case 0x788141:  // 41 81 78 XX YY YY YY YY : cmp DWORD PTR [r8+YY], XX XX XX XX
+    case 0x798141:  // 41 81 78 XX YY YY YY YY : cmp DWORD PTR [r9+YY], XX XX XX XX
+    case 0x7a8141:  // 41 81 78 XX YY YY YY YY : cmp DWORD PTR [r10+YY], XX XX XX XX
+    case 0x7b8141:  // 41 81 78 XX YY YY YY YY : cmp DWORD PTR [r11+YY], XX XX XX XX
+    case 0x7c8141:  // 41 81 78 XX YY YY YY YY : cmp DWORD PTR [r12+YY], XX XX XX XX
+    case 0x7d8141:  // 41 81 78 XX YY YY YY YY : cmp DWORD PTR [r13+YY], XX XX XX XX
+    case 0x7e8141:  // 41 81 78 XX YY YY YY YY : cmp DWORD PTR [r14+YY], XX XX XX XX
     case 0x7f8141:  // 41 81 78 YY XX XX XX XX cmp DWORD P [r15+YY], XX XX XX XX
     case 0x247c81:  // 81 7c 24 YY XX XX XX XX cmp DWORD P [rsp+YY], XX XX XX XX
       return 8;

>From edfffaa1471ae99d1f6a13e436d6d46930bd3685 Mon Sep 17 00:00:00 2001
From: Charlie Barto <Charles.Barto at microsoft.com>
Date: Wed, 31 May 2023 13:10:51 -0700
Subject: [PATCH 4/5] prevent clang-format from wrapping some lines

---
 compiler-rt/lib/interception/interception_win.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/compiler-rt/lib/interception/interception_win.cpp b/compiler-rt/lib/interception/interception_win.cpp
index c02291beddb771d..3052c8f97177827 100644
--- a/compiler-rt/lib/interception/interception_win.cpp
+++ b/compiler-rt/lib/interception/interception_win.cpp
@@ -703,6 +703,8 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
 
     case 0xec8148:    // 48 81 EC XX XX XX XX : sub rsp, XXXXXXXX
       return 7;
+
+      // clang-format off
     case 0x788141:  // 41 81 78 XX YY YY YY YY : cmp DWORD PTR [r8+YY], XX XX XX XX
     case 0x798141:  // 41 81 78 XX YY YY YY YY : cmp DWORD PTR [r9+YY], XX XX XX XX
     case 0x7a8141:  // 41 81 78 XX YY YY YY YY : cmp DWORD PTR [r10+YY], XX XX XX XX
@@ -713,6 +715,7 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
     case 0x7f8141:  // 41 81 78 YY XX XX XX XX cmp DWORD P [r15+YY], XX XX XX XX
     case 0x247c81:  // 81 7c 24 YY XX XX XX XX cmp DWORD P [rsp+YY], XX XX XX XX
       return 8;
+      // clang-format on
 
     case 0x058b48:    // 48 8b 05 XX XX XX XX :
                       //   mov rax, QWORD PTR [rip + XXXXXXXX]

>From 39b422d9075bc54778800d58b494cc78fe644436 Mon Sep 17 00:00:00 2001
From: Charlie Barto <Charles.Barto at microsoft.com>
Date: Wed, 31 May 2023 13:20:32 -0700
Subject: [PATCH 5/5] correct a few more comments

---
 compiler-rt/lib/interception/interception_win.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/compiler-rt/lib/interception/interception_win.cpp b/compiler-rt/lib/interception/interception_win.cpp
index 3052c8f97177827..6f1e36adf1a8047 100644
--- a/compiler-rt/lib/interception/interception_win.cpp
+++ b/compiler-rt/lib/interception/interception_win.cpp
@@ -604,7 +604,7 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
       return 2;
 
     case 0x7E80:  // 80 7E YY XX  cmp BYTE PTR [rsi+YY], XX
-    case 0x7D80:  // 80 7D YY XX  cmp BYTE PTR [rdx+YY], XX
+    case 0x7D80:  // 80 7D YY XX  cmp BYTE PTR [rbp+YY], XX
     case 0x7A80:  // 80 7A YY XX  cmp BYTE PTR [rdx+YY], XX
     case 0x7880:  // 80 78 YY XX  cmp BYTE PTR [rax+YY], XX
     case 0x7B80:  // 80 7B YY XX  cmp BYTE PTR [rbx+YY], XX
@@ -617,10 +617,10 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
       return 6;
 
     case 0x7E81:  // 81 7E YY XX XX XX XX  cmp DWORD PTR [rsi+YY], XX XX XX XX
-    case 0x7D81:  // 81 7D YY XX XX XX XX  cmp DWORD PTR [rdx+YY], XX XX XX XX
+    case 0x7D81:  // 81 7D YY XX XX XX XX  cmp DWORD PTR [rbp+YY], XX XX XX XX
     case 0x7A81:  // 81 7A YY XX XX XX XX  cmp DWORD PTR [rdx+YY], XX XX XX XX
     case 0x7881:  // 81 78 YY XX XX XX XX  cmp DWORD PTR [rax+YY], XX XX XX XX
-    case 0x7B81:  // 81 78 YY XX XX XX XX  cmp DWORD PTR [rbx+YY], XX XX XX XX
+    case 0x7B81:  // 81 7B YY XX XX XX XX  cmp DWORD PTR [rbx+YY], XX XX XX XX
     case 0x7981:  // 81 79 YY XX XX XX XX  cmp dword ptr [rcx+YY], XX XX XX XX
       return 7;
   }
@@ -628,7 +628,7 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
   switch (0x00FFFFFF & *(u32*)address) {
     case 0x07c1f6:  // f6 c1 07 : test cl, 0x7
     case 0x10b70f:  // 0f b7 10 : movzx edx, word ptr [rax]
-    case 0xc00b4d:  // 3d 0b c0 : or r8, r8
+    case 0xc00b4d:  // 4d 0b c0 : or r8, r8
     case 0xc03345:  // 45 33 c0 : xor r8d, r8d
     case 0xc08548:  // 48 85 c0 : test rax, rax
     case 0xc0854d:  // 4d 85 c0 : test r8, r8
@@ -676,7 +676,7 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
     case 0xe0e483:  // 83 e4 e0 : and esp, 0xffffffe0
     case 0xe48548:  // 48 85 e4 : test rsp, rsp
     case 0xe4854d:  // 4d 85 e4 : test r12, r12
-    case 0xe58948:  // 48 8b c4 : mov rbp, rsp
+    case 0xe58948:  // 48 89 c4 : mov rbp, rsp
     case 0xed8548:  // 48 85 ed : test rbp, rbp
     case 0xed854d:  // 4d 85 ed : test r13, r13
     case 0xf6854d:  // 4d 85 f6 : test r14, r14



More information about the llvm-commits mailing list