首页 > 解决方案 > 为什么静态局部变量的 MSVC 线程安全初始化使用 TLS


VS 2015以上,用/Zc:threadSafeInit,静态局部变量的初始化是线程安全的,但是MSDN说

线程安全的静态局部变量在内部使用线程局部存储 (TLS) 以在静态已经初始化时提供高效执行。

如果使用 动态加载 Dll,Windows XP 会忽略 .tls 段LoadLibrary,因此

  1. 为什么使用 TLS 以及它是如何高效工作的?
  2. std::call_once 是否使用 TLS ?
  3. 如何在没有 TLS 的情况下实现线程安全初始化?

引用的链接在这里Thread-safe Local Static Initialization

此功能的实现依赖于 Windows Vista 及更高版本操作系统中的 Windows 操作系统支持功能。Windows XP、Windows Server 2003 和较早的操作系统没有此支持,因此它们无法获得效率优势。


class AA
    int m_a = 1;

AA* getAA()
    static AA a;
    return &a;

int main()
    AA* pa = getAA();
    return 0;

使用, windbg 中的/Zc:threadSafeInit函数反汇编为:getAA

008c1000 55              push    ebp
008c1001 8bec            mov     ebp,esp
008c1003 64a12c000000    mov     eax,dword ptr fs:[0000002Ch]
008c1009 8b08            mov     ecx,dword ptr [eax]
008c100b 8b15b8338c00    mov     edx,dword ptr [testStatic!__favor+0x4 (008c33b8)]
008c1011 3b9104000000    cmp     edx,dword ptr [ecx+4]
008c1017 7e2d            jle     testStatic!getAA+0x46 (008c1046)
008c1019 68b8338c00      push    offset testStatic!__favor+0x4 (008c33b8)
008c101e e809020000      call    testStatic!_Init_thread_header (008c122c)
008c1023 83c404          add     esp,4
008c1026 833db8338c00ff  cmp     dword ptr [testStatic!__favor+0x4 (008c33b8)],0FFFFFFFFh
008c102d 7517            jne     testStatic!getAA+0x46 (008c1046)
008c102f b9bc338c00      mov     ecx,offset testStatic!a (008c33bc)
008c1034 e817000000      call    testStatic!AA::AA (008c1050)
008c1039 68b8338c00      push    offset testStatic!__favor+0x4 (008c33b8)
008c103e e89f010000      call    testStatic!_Init_thread_footer (008c11e2)
008c1043 83c404          add     esp,4
008c1046 b8bc338c00      mov     eax,offset testStatic!a (008c33bc)
008c104b 5d              pop     ebp
008c104c c3              ret


010e1000 55              push    ebp
010e1001 8bec            mov     ebp,esp
010e1003 a180330e01      mov     eax,dword ptr [testStatic!a+0x4 (010e3380)]
010e1008 83e001          and     eax,1
010e100b 7519            jne     testStatic!getAA+0x26 (010e1026)
010e100d 8b0d80330e01    mov     ecx,dword ptr [testStatic!a+0x4 (010e3380)]
010e1013 83c901          or      ecx,1
010e1016 890d80330e01    mov     dword ptr [testStatic!a+0x4 (010e3380)],ecx
010e101c b97c330e01      mov     ecx,offset testStatic!a (010e337c)
010e1021 e80a000000      call    testStatic!AA::AA (010e1030)
010e1026 b87c330e01      mov     eax,offset testStatic!a (010e337c)
010e102b 5d              pop     ebp
010e102c c3              ret

标签: c++sslvisual-c++thread-safety


为什么静态局部变量的 MSVC 线程安全初始化使用 TLS

TLS 仅用作优化。对于静态初始化,它使用单个全局临界区,或者在可用时使用 SRW 锁。

您可以在 VC++ CRT 源代码中看到它crt/src/vcruntime/thread_safe_statics.cpp

它是开源的,可作为 MSVC 安装的一部分使用。我在此处包含相关片段以供参考:

// thread_safe_statics.cpp
//      Copyright (c) Microsoft Corporation. All rights reserved.
// Helper functions used by thread-safe static initialization.
#ifdef _M_CEE
    #error This file cannot be built as managed

#include <vcstartup_internal.h>
#include <vcruntime_internal.h>
#include <limits.h>

static DWORD const xp_timeout = 100; // ms
static int const uninitialized = 0;
static int const being_initialized = -1;
static int const epoch_start = INT_MIN;

// Access to these variables is guarded in the below functions.  They may only
// be modified while the lock is held.  _Tss_epoch is readable from user
// code and is read without taking the lock.
extern "C"
    int _Init_global_epoch = epoch_start;
    __declspec(thread) int _Init_thread_epoch = epoch_start;

// On Vista or newer, the native CONDITION_VARIABLE type is used.  On XP, we use a simple
// Windows event.  This is not safe to use as a complete condition variable, but for the purposes
// of this feature the event is sufficient but not optimal.  See the code in _Tss_wait
// below.
// For Windows OS components:  The OS supports APISets downlevel to Windows 7,
// and OS components that run downlevel to Windows 7 may build against APISets.
// However, these components cannot use CONDITION_VARIABLE directly because it
// is not available via APISets until Windows 8.  Thus, for Windows OS components,
// we use the "ancient" code path and first try the APISet and then fall back to
// kernel32.dll.
#if defined _SCRT_ENCLAVE_BUILD || defined _CRT_APP || defined _CRT_WINDOWS_USE_VISTA_TSS \
    || (!defined _CRT_WINDOWS && (defined _ONECORE || defined _M_ARM || defined _M_ARM64))

static CONDITION_VARIABLE g_tss_cv;

static SRWLOCK g_tss_srw;
static HANDLE             g_tss_event;
static CRITICAL_SECTION   g_tss_mutex;
static decltype(SleepConditionVariableCS)* g_sleep_condition_variable_cs;
static decltype(WakeAllConditionVariable)* g_wake_all_condition_variable;

static void __cdecl __scrt_initialize_thread_safe_statics_platform_specific() noexcept
    // This can fail pre-Vista and that is ignored.
    InitializeCriticalSectionAndSpinCount(&g_tss_mutex, 4000);

    // CONDITION_VARIABLE is available via this APISet starting on Windows 8.
    HMODULE kernel_dll = GetModuleHandleW(L"api-ms-win-core-synch-l1-2-0.dll");
    if (kernel_dll == nullptr)
        kernel_dll = GetModuleHandleW(L"kernel32.dll");

    if (kernel_dll == nullptr)

    #define GET_PROC_ADDRESS(m, f) reinterpret_cast<decltype(f)*>(GetProcAddress(m, _CRT_STRINGIZE(f)))

    auto const sleep_condition_variable_cs = GET_PROC_ADDRESS(kernel_dll, SleepConditionVariableCS);
    auto const wake_all_condition_variable = GET_PROC_ADDRESS(kernel_dll, WakeAllConditionVariable);


    if (sleep_condition_variable_cs && wake_all_condition_variable)
        g_sleep_condition_variable_cs = sleep_condition_variable_cs;
        g_wake_all_condition_variable = wake_all_condition_variable;
        g_tss_event = CreateEventW(NULL, TRUE, FALSE, NULL);
        if (g_tss_event == nullptr)

// Terminator for synchronization data structures.
static void __cdecl __scrt_uninitialize_thread_safe_statics() noexcept
    if (g_tss_event != nullptr)

// Initializer for synchronization data structures.
static int __cdecl __scrt_initialize_thread_safe_statics() noexcept

    // If CRT initialization was skipped then we should initialize the atexit tables.
    // This will only be needed when using a managed DLL with /NOENTRY specified.
    if (!__scrt_initialize_onexit_tables(__scrt_module_type::dll))
    return 0;

_CRTALLOC(".CRT$XIC") static _PIFV __scrt_initialize_tss_var = __scrt_initialize_thread_safe_statics;

// Helper functions for accessing the mutex and condition variable.  Can be replaced with
// more suitable data structures provided by the CRT, preferably ones that use the most
// efficient synchronization primitives available on the platform.
// This is not intended to be a recursive lock.
extern "C" void __cdecl _Init_thread_lock()

extern "C" void __cdecl _Init_thread_unlock()

// Wait on the condition variable.  In the XP implementation using only a Windows event
// we can't guarantee that we'll ever actually receive the notification signal, so we
// must use a non-infinite timeout.  This is not optimal: we may wake up early if the
// initializer is long-running, or we may miss the signal and not wake up until the
// timeout expires.  The signal may be missed because the sleeping threads may be
// stolen by the kernel to service an APC, or due to the race condition between the
// unlock call and the WaitForSingleObject call.
extern "C" void __cdecl _Init_thread_wait(DWORD const timeout)
    SleepConditionVariableSRW(&g_tss_cv, &g_tss_srw, timeout, 0);
    if (g_sleep_condition_variable_cs)
        // Vista+ code is first because it's most likely
        g_sleep_condition_variable_cs(&g_tss_cv, &g_tss_mutex, timeout);

    _ASSERT(timeout != INFINITE);
    WaitForSingleObjectEx(g_tss_event, timeout, FALSE);

extern "C" void __cdecl _Init_thread_notify()
    if (g_wake_all_condition_variable)
        // Vista+ code is first because it's most likely


// Control access to the initialization expression.  Only one thread may leave
// this function before the variable has completed initialization, this thread
// will perform initialization.  All other threads are blocked until the
// initialization completes or fails due to an exception.
extern "C" void __cdecl _Init_thread_header(int* const pOnce) noexcept

    if (*pOnce == uninitialized)
        *pOnce = being_initialized;
        while (*pOnce == being_initialized)
            // Timeout can be replaced with an infinite wait when XP support is
            // removed or the XP-based condition variable is sophisticated enough
            // to guarantee all waiting threads will be woken when the variable is
            // signalled.

            if (*pOnce == uninitialized)
                *pOnce = being_initialized;
        _Init_thread_epoch = _Init_global_epoch;


// Abort processing of the initializer due to an exception.  Reset the state
// to uninitialized and release waiting threads (one of which will take over
// initialization, any remaining will again sleep).
extern "C" void __cdecl _Init_thread_abort(int* const pOnce) noexcept
    *pOnce = uninitialized;

// Called by the thread that completes initialization of a variable.
// Increment the global and per thread counters, mark the variable as
// initialized, and release waiting threads.
extern "C" void __cdecl _Init_thread_footer(int* const pOnce) noexcept
    *pOnce = _Init_global_epoch;
    _Init_thread_epoch = _Init_global_epoch;

需要锁的原因源于 ISO C++ 标准 ( [stmt.dcl]/4 ),它要求块范围的局部变量static以线程安全的方式精确初始化一次(这部分也在 MSDN 中进行了解释关于/Zc:threadSafeInit的文章,它可以作为编译器扩展来放宽这个要求)。
