[llvm] [X86AsmParser] Check displacement overflow (PR #75747)

via llvm-commits llvm-commits at lists.llvm.org
Sun Dec 17 11:09:53 PST 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-mc

Author: Fangrui Song (MaskRay)

<details>
<summary>Changes</summary>

A displacement is an 8-, 16-, or 32-bit value.
LLVM integrated assembler silently encodes an out-of-range displacement.

GNU assembler checks the displacement and may report a warning or error
(error is for 64-bit addressing, done as part of
https://sourceware.org/PR10636).
```
movq 0x80000000(%rip), %rax
Error: 0x80000000 out of range of signed 32bit displacement

movq -0x080000001(%rax), %rax
Error: 0xffffffff7fffffff out of range of signed 32bit displacement

movl 0x100000001(%eax), %eax
Warning: 0x100000001 shortened to 0x1
```

For 32-bit addressing, GNU assembler gives no diagnostic when the
displacement is within [-2**32,2**32). 16-bit addressing is similar.
```
movl 0xffffffff(%eax), %eax  # no diagnostic
movl -0xffffffff(%eax), %eax # no diagnostic
```

Supporting a larger range is probably because wraparound using a large
constant is more reasonable. E.g. Linux kernel arch/x86/kernel/head_32.S
has `leal -__PAGE_OFFSET(%ecx),%esp` where `__PAGE_OFFSET` is
0xc0000000.

This patch implements a similar behavior.


---
Full diff: https://github.com/llvm/llvm-project/pull/75747.diff


5 Files Affected:

- (modified) llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp (+27) 
- (added) llvm/test/MC/X86/displacement-overflow.s (+36) 
- (modified) llvm/test/MC/X86/x86-64.s (+26-26) 
- (modified) llvm/test/MC/X86/x86_64-asm-match.s (+3-3) 
- (modified) llvm/test/MC/X86/x86_64-encoding.s (+3-3) 


``````````diff
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index e78d160564601c..ef622e08606cbf 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -3062,6 +3062,33 @@ bool X86AsmParser::ParseMemOperand(unsigned SegReg, const MCExpr *Disp,
                                       ErrMsg))
     return Error(BaseLoc, ErrMsg);
 
+  // If the displacement is a constant, check overflows. For 64-bit addressing,
+  // gas requires isInt<32> and otherwise reports an error. For others, gas
+  // reports a warning and allows a wider range. E.g. gas allows
+  // [-0xffffffff,0xffffffff] for 32-bit addressing (e.g. Linux kernel uses
+  // `leal -__PAGE_OFFSET(%ecx),%esp` where __PAGE_OFFSET is 0xc0000000).
+  if (BaseReg || IndexReg) {
+    if (auto CE = dyn_cast<MCConstantExpr>(Disp)) {
+      auto Imm = CE->getValue();
+      bool Is64 = X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) ||
+                  X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg);
+      bool Is16 = X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg);
+      if (Is64) {
+        if (!isInt<32>(Imm))
+          return Error(BaseLoc, "displacement " + Twine(Imm) +
+                                    " is not within [-2147483648, 2147483647]");
+      } else if (!Is16) {
+        if (!isUInt<32>(Imm < 0 ? -uint64_t(Imm) : uint64_t(Imm))) {
+          Warning(BaseLoc,
+                  "displacement " + Twine(Imm) + " shortened to signed 32-bit");
+        }
+      } else if (!isUInt<16>(Imm < 0 ? -uint64_t(Imm) : uint64_t(Imm))) {
+        Warning(BaseLoc,
+                "displacement " + Twine(Imm) + " shortened to signed 16-bit");
+      }
+    }
+  }
+
   if (SegReg || BaseReg || IndexReg)
     Operands.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
                                              BaseReg, IndexReg, Scale, StartLoc,
diff --git a/llvm/test/MC/X86/displacement-overflow.s b/llvm/test/MC/X86/displacement-overflow.s
new file mode 100644
index 00000000000000..626003a29109ea
--- /dev/null
+++ b/llvm/test/MC/X86/displacement-overflow.s
@@ -0,0 +1,36 @@
+# RUN: not llvm-mc -triple=x86_64 %s 2>&1 | FileCheck %s --check-prefixes=CHECK,64 --implicit-check-not=error: --implicit-check-not=warning:
+# RUN: llvm-mc -triple=i686 --defsym A16=1 %s 2>&1 | FileCheck %s --check-prefixes=CHECK,32 --implicit-check-not=error: --implicit-check-not=warning:
+
+.ifndef A16
+movq 0x80000000-1(%rip), %rax
+leaq -0x80000000(%rip), %rax
+
+# 64: [[#@LINE+1]]:17: error: displacement 2147483648 is not within [-2147483648, 2147483647]
+movq 0x80000000(%rip), %rax
+
+# 64: [[#@LINE+1]]:18: error: displacement -2147483649 is not within [-2147483648, 2147483647]
+leaq -0x80000001(%rip), %rax
+.endif
+
+movl 0xffffffff(%eax), %eax
+leal -0xffffffff(%eax), %eax
+
+# CHECK: [[#@LINE+1]]:19: warning: displacement 4294967296 shortened to signed 32-bit
+movl 0xffffffff+1(%eax), %eax
+
+# CHECK: [[#@LINE+1]]:20: warning: displacement -4294967296 shortened to signed 32-bit
+leal -0xffffffff-1(%eax), %eax
+
+{disp8} leal 0x100(%ebx), %eax
+{disp8} leal -0x100(%ebx), %eax
+
+.ifdef A16
+.code16
+movw $0, 0xffff(%bp)
+movw $0, -0xffff(%si)
+
+# 32: [[#@LINE+1]]:19: warning: displacement 65536 shortened to signed 16-bit
+movw $0, 0xffff+1(%bp)
+# 32: [[#@LINE+1]]:20: warning: displacement -65536 shortened to signed 16-bit
+movw $0, -0xffff-1(%si)
+.endif
diff --git a/llvm/test/MC/X86/x86-64.s b/llvm/test/MC/X86/x86-64.s
index 67b962c563eb57..108d1220107e33 100644
--- a/llvm/test/MC/X86/x86-64.s
+++ b/llvm/test/MC/X86/x86-64.s
@@ -346,11 +346,11 @@ fnstsw %ax
 
 // rdar://8431880
 // CHECK: rclb	%bl
-// CHECK: rcll	3735928559(%ebx,%ecx,8)
+// CHECK: rcll	2125315823(%ebx,%ecx,8)
 // CHECK: rcrl	%ecx
 // CHECK: rcrl	305419896
 rcl	%bl
-rcll	0xdeadbeef(%ebx,%ecx,8)
+rcll	0x7eadbeef(%ebx,%ecx,8)
 rcr	%ecx
 rcrl	0x12345678
 
@@ -573,9 +573,9 @@ leaq	8(%rax), %rsi
 // CHECK: encoding: [0x48,0x8d,0x70,0x08]
 
 
-cvttpd2dq	0xdeadbeef(%ebx,%ecx,8),%xmm5
-// CHECK: cvttpd2dq	3735928559(%ebx,%ecx,8), %xmm5
-// CHECK: encoding: [0x67,0x66,0x0f,0xe6,0xac,0xcb,0xef,0xbe,0xad,0xde]
+cvttpd2dq	0x7eadbeef(%ebx,%ecx,8),%xmm5
+// CHECK: cvttpd2dq	2125315823(%ebx,%ecx,8), %xmm5
+// CHECK: encoding: [0x67,0x66,0x0f,0xe6,0xac,0xcb,0xef,0xbe,0xad,0x7e]
 
 // rdar://8490728 - llvm-mc rejects 'movmskpd'
 movmskpd	%xmm6, %rax
@@ -906,8 +906,8 @@ xchgl   %ecx, 368(%rax)
 // CHECK: xchgl	%ecx, 368(%rax)
 
 // rdar://8407548
-xchg	0xdeadbeef(%rbx,%rcx,8),%bl
-// CHECK: xchgb	%bl, 3735928559(%rbx,%rcx,8)
+xchg	0x7fffffff(%rbx,%rcx,8),%bl
+// CHECK: xchgb	%bl, 2147483647(%rbx,%rcx,8)
 
 
 
@@ -1112,7 +1112,7 @@ mov %gs, (%rsi)  // CHECK: movw	%gs, (%rsi) # encoding: [0x8c,0x2e]
 //CHECK: divb	%bl
 //CHECK: divw	%bx
 //CHECK: divl	%ecx
-//CHECK: divl	3735928559(%ebx,%ecx,8)
+//CHECK: divl	2125315823(%ebx,%ecx,8)
 //CHECK: divl	69
 //CHECK: divl	32493
 //CHECK: divl	3133065982
@@ -1120,7 +1120,7 @@ mov %gs, (%rsi)  // CHECK: movw	%gs, (%rsi) # encoding: [0x8c,0x2e]
 //CHECK: idivb	%bl
 //CHECK: idivw	%bx
 //CHECK: idivl	%ecx
-//CHECK: idivl	3735928559(%ebx,%ecx,8)
+//CHECK: idivl	2125315823(%ebx,%ecx,8)
 //CHECK: idivl	69
 //CHECK: idivl	32493
 //CHECK: idivl	3133065982
@@ -1128,7 +1128,7 @@ mov %gs, (%rsi)  // CHECK: movw	%gs, (%rsi) # encoding: [0x8c,0x2e]
 	div	%bl,%al
 	div	%bx,%ax
 	div	%ecx,%eax
-	div	0xdeadbeef(%ebx,%ecx,8),%eax
+	div	0x7eadbeef(%ebx,%ecx,8),%eax
 	div	0x45,%eax
 	div	0x7eed,%eax
 	div	0xbabecafe,%eax
@@ -1136,7 +1136,7 @@ mov %gs, (%rsi)  // CHECK: movw	%gs, (%rsi) # encoding: [0x8c,0x2e]
 	idiv	%bl,%al
 	idiv	%bx,%ax
 	idiv	%ecx,%eax
-	idiv	0xdeadbeef(%ebx,%ecx,8),%eax
+	idiv	0x7eadbeef(%ebx,%ecx,8),%eax
 	idiv	0x45,%eax
 	idiv	0x7eed,%eax
 	idiv	0xbabecafe,%eax
@@ -1510,9 +1510,9 @@ vmovd %xmm0, %eax
 vmovd %xmm0, %rax
 vmovq %xmm0, %rax
 
-// CHECK: seto 3735928559(%r10,%r9,8)
-// CHECK:  encoding: [0x43,0x0f,0x90,0x84,0xca,0xef,0xbe,0xad,0xde]
-	seto 0xdeadbeef(%r10,%r9,8)
+// CHECK: seto 2125315823(%r10,%r9,8)
+// CHECK:  encoding: [0x43,0x0f,0x90,0x84,0xca,0xef,0xbe,0xad,0x7e]
+	seto 0x7eadbeef(%r10,%r9,8)
 
 // CHECK: 	monitorx
 // CHECK:  encoding: [0x0f,0x01,0xfa]
@@ -1550,9 +1550,9 @@ vmovq %xmm0, %rax
 // CHECK:  encoding: [0x47,0x89,0x3c,0x3f]
 movl %r15d, (%r15,%r15)
 
-// CHECK: nopq	3735928559(%rbx,%rcx,8)
-// CHECK:  encoding: [0x48,0x0f,0x1f,0x84,0xcb,0xef,0xbe,0xad,0xde]
-nopq	0xdeadbeef(%rbx,%rcx,8)
+// CHECK: nopq	2125315823(%rbx,%rcx,8)
+// CHECK:  encoding: [0x48,0x0f,0x1f,0x84,0xcb,0xef,0xbe,0xad,0x7e]
+nopq	0x7eadbeef(%rbx,%rcx,8)
 
 // CHECK: nopq	%rax
 // CHECK:  encoding: [0x48,0x0f,0x1f,0xc0]
@@ -1562,17 +1562,17 @@ nopq	%rax
 // CHECK: encoding: [0xf3,0x0f,0xc7,0xf8]
 rdpid %rax
 
-// CHECK: ptwritel 3735928559(%rbx,%rcx,8)
-// CHECK:  encoding: [0xf3,0x0f,0xae,0xa4,0xcb,0xef,0xbe,0xad,0xde]
-ptwritel 0xdeadbeef(%rbx,%rcx,8)
+// CHECK: ptwritel 2125315823(%rbx,%rcx,8)
+// CHECK:  encoding: [0xf3,0x0f,0xae,0xa4,0xcb,0xef,0xbe,0xad,0x7e]
+ptwritel 0x7eadbeef(%rbx,%rcx,8)
 
 // CHECK: ptwritel %eax
 // CHECK:  encoding: [0xf3,0x0f,0xae,0xe0]
 ptwritel %eax
 
-// CHECK: ptwriteq 3735928559(%rbx,%rcx,8)
-// CHECK:  encoding: [0xf3,0x48,0x0f,0xae,0xa4,0xcb,0xef,0xbe,0xad,0xde]
-ptwriteq 0xdeadbeef(%rbx,%rcx,8)
+// CHECK: ptwriteq 2125315823(%rbx,%rcx,8)
+// CHECK:  encoding: [0xf3,0x48,0x0f,0xae,0xa4,0xcb,0xef,0xbe,0xad,0x7e]
+ptwriteq 0x7eadbeef(%rbx,%rcx,8)
 
 // CHECK: ptwriteq %rax
 // CHECK:  encoding: [0xf3,0x48,0x0f,0xae,0xe0]
@@ -1586,9 +1586,9 @@ wbnoinvd
 // CHECK:  encoding: [0x0f,0x1c,0x40,0x04]
 cldemote 4(%rax)
 
-// CHECK: cldemote 3735928559(%rbx,%rcx,8)
-// CHECK:  encoding: [0x0f,0x1c,0x84,0xcb,0xef,0xbe,0xad,0xde]
-cldemote 0xdeadbeef(%rbx,%rcx,8)
+// CHECK: cldemote 2125315823(%rbx,%rcx,8)
+// CHECK:  encoding: [0x0f,0x1c,0x84,0xcb,0xef,0xbe,0xad,0x7e]
+cldemote 0x7eadbeef(%rbx,%rcx,8)
 
 // CHECK: umonitor %r13
 // CHECK:  encoding: [0xf3,0x41,0x0f,0xae,0xf5]
diff --git a/llvm/test/MC/X86/x86_64-asm-match.s b/llvm/test/MC/X86/x86_64-asm-match.s
index 50f25e59f49f45..7d35ebdbb2c200 100644
--- a/llvm/test/MC/X86/x86_64-asm-match.s
+++ b/llvm/test/MC/X86/x86_64-asm-match.s
@@ -31,9 +31,9 @@
 // CHECK:   Opcode result: complete match, selecting this opcode
 // CHECK: AsmMatcher: found 2 encodings with mnemonic 'crc32l'
 // CHECK: Trying to match opcode CRC32r32r32
-// CHECK:   Matching formal operand class MCK_GR32 against actual operand at index 1 (Memory: ModeSize=64,BaseReg=rbx,IndexReg=rcx,Scale=8,Disp=3735928559,SegReg=gs): Opcode result: multiple operand mismatches, ignoring this opcode
+// CHECK:   Matching formal operand class MCK_GR32 against actual operand at index 1 (Memory: ModeSize=64,BaseReg=rbx,IndexReg=rcx,Scale=8,Disp=2125315823,SegReg=gs): Opcode result: multiple operand mismatches, ignoring this opcode
 // CHECK: Trying to match opcode CRC32r32m32
-// CHECK:   Matching formal operand class MCK_Mem32 against actual operand at index 1 (Memory: ModeSize=64,BaseReg=rbx,IndexReg=rcx,Scale=8,Disp=3735928559,SegReg=gs): match success using generic matcher
+// CHECK:   Matching formal operand class MCK_Mem32 against actual operand at index 1 (Memory: ModeSize=64,BaseReg=rbx,IndexReg=rcx,Scale=8,Disp=2125315823,SegReg=gs): match success using generic matcher
 // CHECK:   Matching formal operand class MCK_GR32 against actual operand at index 2 (Reg:ecx): match success using generic matcher
 // CHECK:   Matching formal operand class InvalidMatchClass against actual operand at index 3: actual operand index out of range
 // CHECK:   Opcode result: complete match, selecting this opcode
@@ -62,7 +62,7 @@
 pshufb    CPI1_0(%rip), %xmm1
 sha1rnds4 $1, %xmm1, %xmm2
 pinsrw    $3, %ecx, %xmm5
-crc32l    %gs:0xdeadbeef(%rbx,%rcx,8),%ecx
+crc32l    %gs:0x7eadbeef(%rbx,%rcx,8),%ecx
 maskmovdqu     %xmm0, %xmm1
 vmaskmovdqu     %xmm0, %xmm1
 
diff --git a/llvm/test/MC/X86/x86_64-encoding.s b/llvm/test/MC/X86/x86_64-encoding.s
index ff541c2d6568dc..247b1dc656bc16 100644
--- a/llvm/test/MC/X86/x86_64-encoding.s
+++ b/llvm/test/MC/X86/x86_64-encoding.s
@@ -29,9 +29,9 @@ movq	%gs:(%rdi), %rax
 // CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x43,0x04]
         crc32l	4(%rbx), %eax
 
-// CHECK: crc32l 	3735928559(%rbx,%rcx,8), %ecx
-// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x8c,0xcb,0xef,0xbe,0xad,0xde]
-        	crc32l   0xdeadbeef(%rbx,%rcx,8),%ecx
+// CHECK: crc32l 	2125315823(%rbx,%rcx,8), %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x8c,0xcb,0xef,0xbe,0xad,0x7e]
+        crc32l   0x7eadbeef(%rbx,%rcx,8),%ecx
 
 // CHECK: crc32l 	69, %ecx
 // CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x0c,0x25,0x45,0x00,0x00,0x00]

``````````

</details>


https://github.com/llvm/llvm-project/pull/75747


More information about the llvm-commits mailing list