[compiler-rt] [sanitizer][asan][msvc] Teach GetInstructionSize about many instructions that appear in MSVC generated code. (PR #69490)

via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 18 11:09:35 PDT 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-compiler-rt-sanitizer

Author: Charlie Barto (barcharcraz)

<details>
<summary>Changes</summary>

MSVC can sometimes generate instructions in function prologues that asan previously didn't know the size of. This teaches asan those sizes. This isn't super useful for using ASAN with non-msvc compilers, but it does stand alone.

>From https://reviews.llvm.org/D151008

---
Full diff: https://github.com/llvm/llvm-project/pull/69490.diff


1 Files Affected:

- (modified) compiler-rt/lib/interception/interception_win.cpp (+133-32) 


``````````diff
diff --git a/compiler-rt/lib/interception/interception_win.cpp b/compiler-rt/lib/interception/interception_win.cpp
index 00c317510e42087..093318fbd47c8d1 100644
--- a/compiler-rt/lib/interception/interception_win.cpp
+++ b/compiler-rt/lib/interception/interception_win.cpp
@@ -489,6 +489,11 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
     case 0x6A:  // 6A XX = push XX
       return 2;
 
+    // This instruction can be encoded with a 16-bit immediate but that is
+    // incredibly unlikely.
+    case 0x68:  // 68 XX XX XX XX : push imm32
+      return 5;
+
     case 0xb8:  // b8 XX XX XX XX : mov eax, XX XX XX XX
     case 0xB9:  // b9 XX XX XX XX : mov ecx, XX XX XX XX
       return 5;
@@ -527,6 +532,9 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
     case 0xC033:  // 33 C0 : xor eax, eax
     case 0xC933:  // 33 C9 : xor ecx, ecx
     case 0xD233:  // 33 D2 : xor edx, edx
+    case 0xDB84:  // 84 DB : test bl,bl
+    case 0xC984:  // 84 C9 : test cl,cl
+    case 0xD284:  // 84 D2 : test dl,dl
       return 2;
 
     // Cannot overwrite control-instruction. Return 0 to indicate failure.
@@ -535,6 +543,9 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
   }
 
   switch (0x00FFFFFF & *(u32*)address) {
+    case 0xF8E484:  // 83 E4 F8 : and esp, 0xFFFFFFF8
+    case 0x64EC83:  // 83 EC 64 : sub esp, 64h
+      return 3;
     case 0x24A48D:  // 8D A4 24 XX XX XX XX : lea esp, [esp + XX XX XX XX]
       return 7;
   }
@@ -544,6 +555,21 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
     case 0xA1:  // A1 XX XX XX XX XX XX XX XX :
                 //   movabs eax, dword ptr ds:[XXXXXXXX]
       return 9;
+    case 0xF2:
+      switch (*(u32 *)(address + 1)) {
+          case 0x2444110f:  // f2 0f 11 44 24 XX    movsd   mmword ptr [rsp +
+                            // XX], xmm0
+          case 0x244c110f:  //  f2 0f 11 4c 24 XX       movsd  QWORD PTR
+                            //  [rsp+0x8],xmm1
+          case 0x2454110f:  //  f2 0f 11 54 24 XX       movsd  QWORD PTR
+                            //  [rsp+0x8],xmm2
+          case 0x245c110f:  //  f2 0f 11 5c 24 XX       movsd  QWORD PTR
+                            //  [rsp+0x8],xmm3
+          case 0x2464110f:  //  f2 0f 11 64 24 XX       movsd  QWORD PTR
+                            //  [rsp+0x8],xmm4
+            return 6;
+      }
+      break;
 
     case 0x83:
       const u8 next_byte = *(u8*)(address + 1);
@@ -568,53 +594,124 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
     case 0x5641:  // push r14
     case 0x5741:  // push r15
     case 0x9066:  // Two-byte NOP
-    case 0xc084:  // test al, al
-    case 0x018a:  // mov al, byte ptr [rcx]
+    case 0xC084:  // test al, al
+    case 0x018A:  // mov al, byte ptr [rcx]
       return 2;
 
+    case 0x7E80:  // 80 7E YY XX  cmp BYTE PTR [rsi+YY], XX
+    case 0x7D80:  // 80 7D YY XX  cmp BYTE PTR [rbp+YY], XX
+    case 0x7A80:  // 80 7A YY XX  cmp BYTE PTR [rdx+YY], XX
+    case 0x7880:  // 80 78 YY XX  cmp BYTE PTR [rax+YY], XX
+    case 0x7B80:  // 80 7B YY XX  cmp BYTE PTR [rbx+YY], XX
+    case 0x7980:  // 80 79 YY XX  cmp BYTE ptr [rcx+YY], XX
+      return 4;
+
     case 0x058B:  // 8B 05 XX XX XX XX : mov eax, dword ptr [XX XX XX XX]
       if (rel_offset)
         *rel_offset = 2;
       return 6;
+
+    case 0x7E81:  // 81 7E YY XX XX XX XX  cmp DWORD PTR [rsi+YY], XX XX XX XX
+    case 0x7D81:  // 81 7D YY XX XX XX XX  cmp DWORD PTR [rbp+YY], XX XX XX XX
+    case 0x7A81:  // 81 7A YY XX XX XX XX  cmp DWORD PTR [rdx+YY], XX XX XX XX
+    case 0x7881:  // 81 78 YY XX XX XX XX  cmp DWORD PTR [rax+YY], XX XX XX XX
+    case 0x7B81:  // 81 7B YY XX XX XX XX  cmp DWORD PTR [rbx+YY], XX XX XX XX
+    case 0x7981:  // 81 79 YY XX XX XX XX  cmp dword ptr [rcx+YY], XX XX XX XX
+      return 7;
   }
 
   switch (0x00FFFFFF & *(u32*)address) {
-    case 0xe58948:    // 48 8b c4 : mov rbp, rsp
-    case 0xc18b48:    // 48 8b c1 : mov rax, rcx
-    case 0xc48b48:    // 48 8b c4 : mov rax, rsp
-    case 0xd9f748:    // 48 f7 d9 : neg rcx
-    case 0xd12b48:    // 48 2b d1 : sub rdx, rcx
-    case 0x07c1f6:    // f6 c1 07 : test cl, 0x7
-    case 0xc98548:    // 48 85 C9 : test rcx, rcx
-    case 0xd28548:    // 48 85 d2 : test rdx, rdx
-    case 0xc0854d:    // 4d 85 c0 : test r8, r8
-    case 0xc2b60f:    // 0f b6 c2 : movzx eax, dl
-    case 0xc03345:    // 45 33 c0 : xor r8d, r8d
-    case 0xc93345:    // 45 33 c9 : xor r9d, r9d
-    case 0xdb3345:    // 45 33 DB : xor r11d, r11d
-    case 0xd98b4c:    // 4c 8b d9 : mov r11, rcx
-    case 0xd28b4c:    // 4c 8b d2 : mov r10, rdx
-    case 0xc98b4c:    // 4C 8B C9 : mov r9, rcx
-    case 0xc18b4c:    // 4C 8B C1 : mov r8, rcx
-    case 0xd2b60f:    // 0f b6 d2 : movzx edx, dl
-    case 0xca2b48:    // 48 2b ca : sub rcx, rdx
-    case 0x10b70f:    // 0f b7 10 : movzx edx, WORD PTR [rax]
-    case 0xc00b4d:    // 3d 0b c0 : or r8, r8
-    case 0xc08b41:    // 41 8b c0 : mov eax, r8d
-    case 0xd18b48:    // 48 8b d1 : mov rdx, rcx
-    case 0xdc8b4c:    // 4c 8b dc : mov r11, rsp
-    case 0xd18b4c:    // 4c 8b d1 : mov r10, rcx
-    case 0xE0E483:    // 83 E4 E0 : and esp, 0xFFFFFFE0
+    case 0x07c1f6:  // f6 c1 07 : test cl, 0x7
+    case 0x10b70f:  // 0f b7 10 : movzx edx, word ptr [rax]
+    case 0xc00b4d:  // 4d 0b c0 : or r8, r8
+    case 0xc03345:  // 45 33 c0 : xor r8d, r8d
+    case 0xc08548:  // 48 85 c0 : test rax, rax
+    case 0xc0854d:  // 4d 85 c0 : test r8, r8
+    case 0xc08b41:  // 41 8b c0 : mov eax, r8d
+    case 0xc0ff48:  // 48 ff c0 : inc rax
+    case 0xc0ff49:  // 49 ff c0 : inc r8
+    case 0xc18b41:  // 41 8b c1 : mov eax, r9d
+    case 0xc18b48:  // 48 8b c1 : mov rax, rcx
+    case 0xc18b4c:  // 4c 8b c1 : mov r8, rcx
+    case 0xc1ff48:  // 48 ff c1 : inc rcx
+    case 0xc1ff49:  // 49 ff c1 : inc r9
+    case 0xc28b41:  // 41 8b c2 : mov eax, r10d
+    case 0xc2b60f:  // 0f b6 c2 : movzx eax, dl
+    case 0xc2ff48:  // 48 ff c2 : inc rdx
+    case 0xc2ff49:  // 49 ff c2 : inc r10
+    case 0xc38b41:  // 41 8b c3 : mov eax, r11d
+    case 0xc3ff48:  // 48 ff c3 : inc rbx
+    case 0xc3ff49:  // 49 ff c3 : inc r11
+    case 0xc48b41:  // 41 8b c4 : mov eax, r12d
+    case 0xc48b48:  // 48 8b c4 : mov rax, rsp
+    case 0xc4ff49:  // 49 ff c4 : inc r12
+    case 0xc5ff49:  // 49 ff c5 : inc r13
+    case 0xc6ff48:  // 48 ff c6 : inc rsi
+    case 0xc6ff49:  // 49 ff c6 : inc r14
+    case 0xc7ff48:  // 48 ff c7 : inc rdi
+    case 0xc7ff49:  // 49 ff c7 : inc r15
+    case 0xc93345:  // 45 33 c9 : xor r9d, r9d
+    case 0xc98548:  // 48 85 c9 : test rcx, rcx
+    case 0xc9854d:  // 4d 85 c9 : test r9, r9
+    case 0xc98b4c:  // 4c 8b c9 : mov r9, rcx
+    case 0xca2b48:  // 48 2b ca : sub rcx, rdx
+    case 0xd12b48:  // 48 2b d1 : sub rdx, rcx
+    case 0xd18b48:  // 48 8b d1 : mov rdx, rcx
+    case 0xd18b4c:  // 4c 8b d1 : mov r10, rcx
+    case 0xd28548:  // 48 85 d2 : test rdx, rdx
+    case 0xd2854d:  // 4d 85 d2 : test r10, r10
+    case 0xd28b4c:  // 4c 8b d2 : mov r10, rdx
+    case 0xd2b60f:  // 0f b6 d2 : movzx edx, dl
+    case 0xd98b4c:  // 4c 8b d9 : mov r11, rcx
+    case 0xd9f748:  // 48 f7 d9 : neg rcx
+    case 0xdb3345:  // 45 33 db : xor r11d, r11d
+    case 0xdb8548:  // 48 85 db : test rbx, rbx
+    case 0xdb854d:  // 4d 85 db : test r11, r11
+    case 0xdc8b4c:  // 4c 8b dc : mov r11, rsp
+    case 0xe0e483:  // 83 e4 e0 : and esp, 0xffffffe0
+    case 0xe48548:  // 48 85 e4 : test rsp, rsp
+    case 0xe4854d:  // 4d 85 e4 : test r12, r12
+    case 0xe58948:  // 48 89 c4 : mov rbp, rsp
+    case 0xed8548:  // 48 85 ed : test rbp, rbp
+    case 0xed854d:  // 4d 85 ed : test r13, r13
+    case 0xf6854d:  // 4d 85 f6 : test r14, r14
+    case 0xff854d:  // 4d 85 ff : test r15, r15
       return 3;
 
-    case 0xec8348:    // 48 83 ec XX : sub rsp, XX
-    case 0xf88349:    // 49 83 f8 XX : cmp r8, XX
-    case 0x588948:    // 48 89 58 XX : mov QWORD PTR[rax + XX], rbx
+    case 0x245489:  // 89 54 24 XX : mov DWORD PTR[rsp + XX], edx
+    case 0x428d44:  // 44 8d 42 XX : lea r8d , [rdx + XX]
+    case 0x588948:  // 48 89 58 XX : mov QWORD PTR[rax + XX], rbx
+    case 0xec8348:  // 48 83 ec XX : sub rsp, XX
+    case 0xf88349:  // 49 83 f8 XX : cmp r8, XX
       return 4;
 
+    case 0x246483:  // 83 64 24 XX YY :   and    DWORD PTR [rsp+XX], YY
+      return 5;
+
+    case 0x788166:  // 66 81 78 XX YY YY  cmp WORD PTR [rax+XX], YY YY
+    case 0x798166:  // 66 81 79 XX YY YY  cmp WORD PTR [rcx+XX], YY YY
+    case 0x7a8166:  // 66 81 7a XX YY YY  cmp WORD PTR [rdx+XX], YY YY
+    case 0x7b8166:  // 66 81 7b XX YY YY  cmp WORD PTR [rbx+XX], YY YY
+    case 0x7e8166:  // 66 81 7e XX YY YY  cmp WORD PTR [rsi+XX], YY YY
+    case 0x7f8166:  // 66 81 7f XX YY YY  cmp WORD PTR [rdi+XX], YY YY
+      return 6;
+
     case 0xec8148:    // 48 81 EC XX XX XX XX : sub rsp, XXXXXXXX
       return 7;
 
+      // clang-format off
+    case 0x788141:  // 41 81 78 XX YY YY YY YY : cmp DWORD PTR [r8+YY], XX XX XX XX
+    case 0x798141:  // 41 81 78 XX YY YY YY YY : cmp DWORD PTR [r9+YY], XX XX XX XX
+    case 0x7a8141:  // 41 81 78 XX YY YY YY YY : cmp DWORD PTR [r10+YY], XX XX XX XX
+    case 0x7b8141:  // 41 81 78 XX YY YY YY YY : cmp DWORD PTR [r11+YY], XX XX XX XX
+    case 0x7c8141:  // 41 81 78 XX YY YY YY YY : cmp DWORD PTR [r12+YY], XX XX XX XX
+    case 0x7d8141:  // 41 81 78 XX YY YY YY YY : cmp DWORD PTR [r13+YY], XX XX XX XX
+    case 0x7e8141:  // 41 81 78 XX YY YY YY YY : cmp DWORD PTR [r14+YY], XX XX XX XX
+    case 0x7f8141:  // 41 81 78 YY XX XX XX XX cmp DWORD P [r15+YY], XX XX XX XX
+    case 0x247c81:  // 81 7c 24 YY XX XX XX XX cmp DWORD P [rsp+YY], XX XX XX XX
+      return 8;
+      // clang-format on
+
     case 0x058b48:    // 48 8b 05 XX XX XX XX :
                       //   mov rax, QWORD PTR [rip + XXXXXXXX]
     case 0x25ff48:    // 48 ff 25 XX XX XX XX :
@@ -640,8 +737,11 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
     case 0x24548948:  // 48 89 54 24 XX : mov QWORD PTR [rsp + XX], rdx
     case 0x244c894c:  // 4c 89 4c 24 XX : mov QWORD PTR [rsp + XX], r9
     case 0x2444894c:  // 4c 89 44 24 XX : mov QWORD PTR [rsp + XX], r8
+    case 0x244c8944:  // 44 89 4c 24 XX   mov DWORD PTR [rsp + XX], r9d
+    case 0x24448944:  // 44 89 44 24 XX   mov DWORD PTR [rsp + XX], r8d
+    case 0x246c8d48:  // 48 8d 6c 24 XX : lea rbp, [rsp + XX]
       return 5;
-    case 0x24648348:  // 48 83 64 24 XX : and QWORD PTR [rsp + XX], YY
+    case 0x24648348:  // 48 83 64 24 XX YY : and QWORD PTR [rsp + XX], YY
       return 6;
   }
 
@@ -655,6 +755,7 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
     case 0x458B:  // 8B 45 XX : mov eax, dword ptr [ebp + XX]
     case 0x5D8B:  // 8B 5D XX : mov ebx, dword ptr [ebp + XX]
     case 0x7D8B:  // 8B 7D XX : mov edi, dword ptr [ebp + XX]
+    case 0x758B:  // 8B 75 XX : mov esi, dword ptr [ebp + XX]
     case 0xEC83:  // 83 EC XX : sub esp, XX
     case 0x75FF:  // FF 75 XX : push dword ptr [ebp + XX]
       return 3;

``````````

</details>


https://github.com/llvm/llvm-project/pull/69490


More information about the llvm-commits mailing list