[llvm] r227983 - Fix program crashes due to alignment exceptions generated for SSE memop instructions (PR22371).
Nadav Rotem
nrotem at apple.com
Tue Feb 3 15:42:04 PST 2015
> On Feb 3, 2015, at 9:13 AM, Sanjay Patel <spatel at rotateright.com> wrote:
>
> Author: spatel
> Date: Tue Feb 3 11:13:04 2015
> New Revision: 227983
>
> URL: http://llvm.org/viewvc/llvm-project?rev=227983&view=rev
> Log:
> Fix program crashes due to alignment exceptions generated for SSE memop instructions (PR22371).
>
> r224330 introduced a bug by misinterpreting the "FeatureVectorUAMem" bit.
> The commit log says that change did not affect anything, but that's not correct.
> That change allowed SSE instructions to have unaligned mem operands folded into
> math ops, and that's not allowed in the default specification for any SSE variant.
>
> The bug is exposed when compiling for an AVX-capable CPU that had this feature
> flag but without enabling AVX codegen. Another mistake in r224330 was not adding
> the feature flag to all AVX CPUs; the AMD chips were excluded.
>
> This is part of the fix for PR22371 ( http://llvm.org/bugs/show_bug.cgi?id=22371 ).
>
> This feature bit is SSE-specific, so I've renamed it to "FeatureSSEUnalignedMem".
> Changed the existing test case for the feature bit to reflect the new name and
> renamed the test file itself to better reflect the feature.
> Added runs to fold-vex.ll to check for the failing codegen.
>
> Note that the feature bit is not set by default on any CPU because it may require a
> configuration register setting to enable the enhanced unaligned behavior.
>
>
> Added:
> llvm/trunk/test/CodeGen/X86/sse-unaligned-mem-feature.ll
> Removed:
> llvm/trunk/test/CodeGen/X86/2010-01-07-UAMemFeature.ll
> Modified:
> llvm/trunk/lib/Target/X86/X86.td
> llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
> llvm/trunk/lib/Target/X86/X86Subtarget.cpp
> llvm/trunk/lib/Target/X86/X86Subtarget.h
> llvm/trunk/test/CodeGen/X86/fold-vex.ll
>
> Modified: llvm/trunk/lib/Target/X86/X86.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86.td?rev=227983&r1=227982&r2=227983&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86.td (original)
> +++ llvm/trunk/lib/Target/X86/X86.td Tue Feb 3 11:13:04 2015
> @@ -132,9 +132,9 @@ def FeatureFMA4 : SubtargetFeature<"f
> def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true",
> "Enable XOP instructions",
> [FeatureFMA4]>;
> -def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem",
> - "HasVectorUAMem", "true",
> - "Allow unaligned memory operands on vector/SIMD instructions">;
> +def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem",
> + "HasSSEUnalignedMem", "true",
> + "Allow unaligned memory operands with SSE instructions">;
> def FeatureAES : SubtargetFeature<"aes", "HasAES", "true",
> "Enable AES instructions",
> [FeatureSSE2]>;
> @@ -309,7 +309,6 @@ class SandyBridgeProc<string Name> : Pro
> FeatureCMPXCHG16B,
> FeatureFastUAMem,
> FeatureSlowUAMem32,
> - FeatureVectorUAMem,
> FeaturePOPCNT,
> FeatureAES,
> FeaturePCLMUL
> @@ -322,7 +321,6 @@ class IvyBridgeProc<string Name> : Proce
> FeatureCMPXCHG16B,
> FeatureFastUAMem,
> FeatureSlowUAMem32,
> - FeatureVectorUAMem,
> FeaturePOPCNT,
> FeatureAES,
> FeaturePCLMUL,
> @@ -337,7 +335,6 @@ class HaswellProc<string Name> : Process
> FeatureAVX2,
> FeatureCMPXCHG16B,
> FeatureFastUAMem,
> - FeatureVectorUAMem,
> FeaturePOPCNT,
> FeatureAES,
> FeaturePCLMUL,
> @@ -360,7 +357,6 @@ class BroadwellProc<string Name> : Proce
> FeatureAVX2,
> FeatureCMPXCHG16B,
> FeatureFastUAMem,
> - FeatureVectorUAMem,
> FeaturePOPCNT,
> FeatureAES,
> FeaturePCLMUL,
> @@ -388,7 +384,7 @@ class KnightsLandingProc<string Name> :
> FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,
> FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,
> FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
> - FeatureSlowIncDec, FeatureVectorUAMem]>;
> + FeatureSlowIncDec]>;
> def : KnightsLandingProc<"knl">;
>
> // FIXME: define SKX model
> @@ -399,7 +395,7 @@ class SkylakeProc<string Name> : Process
> FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,
> FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,
> FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
> - FeatureSlowIncDec, FeatureSGX, FeatureVectorUAMem]>;
> + FeatureSlowIncDec, FeatureSGX]>;
> def : SkylakeProc<"skylake">;
> def : SkylakeProc<"skx">; // Legacy alias.
>
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=227983&r1=227982&r2=227983&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Tue Feb 3 11:13:04 2015
> @@ -433,7 +433,7 @@ def alignedloadv8i64 : PatFrag<(ops nod
> // setting a feature bit in the processor (on startup, for example).
> // Opteron 10h and later implement such a feature.
> def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
> - return Subtarget->hasVectorUAMem()
> + return Subtarget->hasSSEUnalignedMem()
> || cast<LoadSDNode>(N)->getAlignment() >= 16;
> }]>;
>
>
> Modified: llvm/trunk/lib/Target/X86/X86Subtarget.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.cpp?rev=227983&r1=227982&r2=227983&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86Subtarget.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86Subtarget.cpp Tue Feb 3 11:13:04 2015
> @@ -265,7 +265,7 @@ void X86Subtarget::initializeEnvironment
> IsSHLDSlow = false;
> IsUAMemFast = false;
> IsUAMem32Slow = false;
> - HasVectorUAMem = false;
> + HasSSEUnalignedMem = false;
> HasCmpxchg16b = false;
> UseLeaForSP = false;
> HasSlowDivide32 = false;
>
> Modified: llvm/trunk/lib/Target/X86/X86Subtarget.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.h?rev=227983&r1=227982&r2=227983&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86Subtarget.h (original)
> +++ llvm/trunk/lib/Target/X86/X86Subtarget.h Tue Feb 3 11:13:04 2015
> @@ -162,9 +162,9 @@ protected:
> /// True if unaligned 32-byte memory accesses are slow.
> bool IsUAMem32Slow;
>
> - /// HasVectorUAMem - True if SIMD operations can have unaligned memory
> - /// operands. This may require setting a feature bit in the processor.
> - bool HasVectorUAMem;
> + /// True if SSE operations can have unaligned memory operands.
> + /// This may require setting a configuration bit in the processor.
> + bool HasSSEUnalignedMem;
>
> /// HasCmpxchg16b - True if this processor has the CMPXCHG16B instruction;
> /// this is true for most x86-64 chips, but not the first AMD chips.
> @@ -375,7 +375,7 @@ public:
> bool isSHLDSlow() const { return IsSHLDSlow; }
> bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
> bool isUnalignedMem32Slow() const { return IsUAMem32Slow; }
> - bool hasVectorUAMem() const { return HasVectorUAMem; }
> + bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; }
> bool hasCmpxchg16b() const { return HasCmpxchg16b; }
> bool useLeaForSP() const { return UseLeaForSP; }
> bool hasSlowDivide32() const { return HasSlowDivide32; }
>
> Removed: llvm/trunk/test/CodeGen/X86/2010-01-07-UAMemFeature.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2010-01-07-UAMemFeature.ll?rev=227982&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/2010-01-07-UAMemFeature.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/2010-01-07-UAMemFeature.ll (removed)
> @@ -1,11 +0,0 @@
> -; RUN: llc -mcpu=yonah -mattr=vector-unaligned-mem -march=x86 < %s | FileCheck %s
> -; CHECK: addps (
> -
> -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
> -target triple = "x86_64-unknown-linux-gnu"
> -
> -define <4 x float> @foo(<4 x float>* %P, <4 x float> %In) nounwind {
> - %A = load <4 x float>* %P, align 4
> - %B = fadd <4 x float> %A, %In
> - ret <4 x float> %B
> -}
>
> Modified: llvm/trunk/test/CodeGen/X86/fold-vex.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fold-vex.ll?rev=227983&r1=227982&r2=227983&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/fold-vex.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/fold-vex.ll Tue Feb 3 11:13:04 2015
> @@ -1,12 +1,18 @@
> ; Use CPU parameters to ensure that a CPU-specific attribute is not overriding the AVX definition.
>
> -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx | FileCheck %s
> -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s
> -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
> +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
> +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx | FileCheck %s
> +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s
> +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-avx | FileCheck %s --check-prefix=SSE
> +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx -mattr=-avx | FileCheck %s --check-prefix=SSE
> +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 -mattr=-avx | FileCheck %s --check-prefix=SSE
>
> ; No need to load unaligned operand from memory using an explicit instruction with AVX.
> ; The operand should be folded into the AND instr.
>
> +; With SSE, folding memory operands into math/logic ops requires 16-byte alignment
> +; unless specially configured on some CPUs such as AMD Family 10H.
> +
> define <4 x i32> @test1(<4 x i32>* %p0, <4 x i32> %in1) nounwind {
> %in0 = load <4 x i32>* %p0, align 2
> %a = and <4 x i32> %in0, %in1
> @@ -16,5 +22,10 @@ define <4 x i32> @test1(<4 x i32>* %p0,
> ; CHECK-NOT: vmovups
> ; CHECK: vandps (%rdi), %xmm0, %xmm0
> ; CHECK-NEXT: ret
> +
> +; SSE-LABEL: @test1
> +; SSE: movups (%rdi), %xmm1
> +; SSE-NEXT: andps %xmm1, %xmm0
> +; SSE-NEXT: ret
> }
>
>
> Added: llvm/trunk/test/CodeGen/X86/sse-unaligned-mem-feature.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-unaligned-mem-feature.ll?rev=227983&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/sse-unaligned-mem-feature.ll (added)
> +++ llvm/trunk/test/CodeGen/X86/sse-unaligned-mem-feature.ll Tue Feb 3 11:13:04 2015
> @@ -0,0 +1,13 @@
> +; RUN: llc -mcpu=yonah -mattr=sse-unaligned-mem -march=x86 < %s | FileCheck %s
> +
> +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
> +target triple = "x86_64-unknown-linux-gnu"
> +
> +define <4 x float> @foo(<4 x float>* %P, <4 x float> %In) nounwind {
> + %A = load <4 x float>* %P, align 4
> + %B = fadd <4 x float> %A, %In
> + ret <4 x float> %B
> +
> +; CHECK-LABEL: @foo
> +; CHECK: addps (
> +}
>
Sanjay, can you please complete the CHECK line? You can write something like “addps(%rdi)”.
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list