首页 > 解决方案 > 为什么静态局部变量的 MSVC 线程安全初始化使用 TLS

问题描述

VS 2015以上,用/Zc:threadSafeInit,静态局部变量的初始化是线程安全的,但是MSDN说

线程安全的静态局部变量在内部使用线程局部存储 (TLS) 以在静态已经初始化时提供高效执行。

如果使用 动态加载 Dll,Windows XP 会忽略 .tls 段LoadLibrary,因此

  1. 为什么使用 TLS 以及它是如何高效工作的?
  2. std::call_once 是否使用 TLS ?
  3. 如何在没有 TLS 的情况下实现线程安全初始化?

引用的链接在这里Thread-safe Local Static Initialization

此功能的实现依赖于 Windows Vista 及更高版本操作系统中的 Windows 操作系统支持功能。Windows XP、Windows Server 2003 和较早的操作系统没有此支持,因此它们无法获得效率优势。

我的测试代码:

class AA
{
public:
    int m_a = 1;
};

AA* getAA()
{
    static AA a;
    return &a;
}

int main()
{
    AA* pa = getAA();
    return 0;
}

使用, windbg 中的/Zc:threadSafeInit函数反汇编为:getAA

008c1000 55              push    ebp
008c1001 8bec            mov     ebp,esp
008c1003 64a12c000000    mov     eax,dword ptr fs:[0000002Ch]
008c1009 8b08            mov     ecx,dword ptr [eax]
008c100b 8b15b8338c00    mov     edx,dword ptr [testStatic!__favor+0x4 (008c33b8)]
008c1011 3b9104000000    cmp     edx,dword ptr [ecx+4]
008c1017 7e2d            jle     testStatic!getAA+0x46 (008c1046)
008c1019 68b8338c00      push    offset testStatic!__favor+0x4 (008c33b8)
008c101e e809020000      call    testStatic!_Init_thread_header (008c122c)
008c1023 83c404          add     esp,4
008c1026 833db8338c00ff  cmp     dword ptr [testStatic!__favor+0x4 (008c33b8)],0FFFFFFFFh
008c102d 7517            jne     testStatic!getAA+0x46 (008c1046)
008c102f b9bc338c00      mov     ecx,offset testStatic!a (008c33bc)
008c1034 e817000000      call    testStatic!AA::AA (008c1050)
008c1039 68b8338c00      push    offset testStatic!__favor+0x4 (008c33b8)
008c103e e89f010000      call    testStatic!_Init_thread_footer (008c11e2)
008c1043 83c404          add     esp,4
008c1046 b8bc338c00      mov     eax,offset testStatic!a (008c33bc)
008c104b 5d              pop     ebp
008c104c c3              ret

并使用/Zc:threadSafeInit-(关闭功能),函数getAA反汇编为:

010e1000 55              push    ebp
010e1001 8bec            mov     ebp,esp
010e1003 a180330e01      mov     eax,dword ptr [testStatic!a+0x4 (010e3380)]
010e1008 83e001          and     eax,1
010e100b 7519            jne     testStatic!getAA+0x26 (010e1026)
010e100d 8b0d80330e01    mov     ecx,dword ptr [testStatic!a+0x4 (010e3380)]
010e1013 83c901          or      ecx,1
010e1016 890d80330e01    mov     dword ptr [testStatic!a+0x4 (010e3380)],ecx
010e101c b97c330e01      mov     ecx,offset testStatic!a (010e337c)
010e1021 e80a000000      call    testStatic!AA::AA (010e1030)
010e1026 b87c330e01      mov     eax,offset testStatic!a (010e337c)
010e102b 5d              pop     ebp
010e102c c3              ret

标签: c++sslvisual-c++thread-safety

解决方案


为什么静态局部变量的 MSVC 线程安全初始化使用 TLS

TLS 仅用作优化。对于静态初始化,它使用单个全局临界区,或者在可用时使用 SRW 锁。

您可以在 VC++ CRT 源代码中看到它crt/src/vcruntime/thread_safe_statics.cpp

它是开源的,可作为 MSVC 安装的一部分使用。我在此处包含相关片段以供参考:

//
// thread_safe_statics.cpp
//
//      Copyright (c) Microsoft Corporation. All rights reserved.
//
// Helper functions used by thread-safe static initialization.
//
#ifdef _M_CEE
    #error This file cannot be built as managed
#endif

#include <vcstartup_internal.h>
#include <vcruntime_internal.h>
#include <limits.h>

static DWORD const xp_timeout = 100; // ms
static int const uninitialized = 0;
static int const being_initialized = -1;
static int const epoch_start = INT_MIN;

// Access to these variables is guarded in the below functions.  They may only
// be modified while the lock is held.  _Tss_epoch is readable from user
// code and is read without taking the lock.
extern "C"
{
    int _Init_global_epoch = epoch_start;
    __declspec(thread) int _Init_thread_epoch = epoch_start;
}

// On Vista or newer, the native CONDITION_VARIABLE type is used.  On XP, we use a simple
// Windows event.  This is not safe to use as a complete condition variable, but for the purposes
// of this feature the event is sufficient but not optimal.  See the code in _Tss_wait
// below.
//
// For Windows OS components:  The OS supports APISets downlevel to Windows 7,
// and OS components that run downlevel to Windows 7 may build against APISets.
// However, these components cannot use CONDITION_VARIABLE directly because it
// is not available via APISets until Windows 8.  Thus, for Windows OS components,
// we use the "ancient" code path and first try the APISet and then fall back to
// kernel32.dll.
#if defined _SCRT_ENCLAVE_BUILD || defined _CRT_APP || defined _CRT_WINDOWS_USE_VISTA_TSS \
    || (!defined _CRT_WINDOWS && (defined _ONECORE || defined _M_ARM || defined _M_ARM64))
#define _USE_VISTA_THREAD_SAFE_STATICS 1
#else
#define _USE_VISTA_THREAD_SAFE_STATICS 0
#endif

static CONDITION_VARIABLE g_tss_cv;

#if _USE_VISTA_THREAD_SAFE_STATICS
static SRWLOCK g_tss_srw;
#else // ^^^ _USE_VISTA_THREAD_SAFE_STATICS ^^^ // vvv !_USE_VISTA_THREAD_SAFE_STATICS vvv //
static HANDLE             g_tss_event;
static CRITICAL_SECTION   g_tss_mutex;
static decltype(SleepConditionVariableCS)* g_sleep_condition_variable_cs;
static decltype(WakeAllConditionVariable)* g_wake_all_condition_variable;

static void __cdecl __scrt_initialize_thread_safe_statics_platform_specific() noexcept
{
    // This can fail pre-Vista and that is ignored.
    InitializeCriticalSectionAndSpinCount(&g_tss_mutex, 4000);

    // CONDITION_VARIABLE is available via this APISet starting on Windows 8.
    HMODULE kernel_dll = GetModuleHandleW(L"api-ms-win-core-synch-l1-2-0.dll");
    if (kernel_dll == nullptr)
    {
        kernel_dll = GetModuleHandleW(L"kernel32.dll");
    }

    if (kernel_dll == nullptr)
    {
        __scrt_fastfail(FAST_FAIL_FATAL_APP_EXIT);
    }

    #define GET_PROC_ADDRESS(m, f) reinterpret_cast<decltype(f)*>(GetProcAddress(m, _CRT_STRINGIZE(f)))

    auto const sleep_condition_variable_cs = GET_PROC_ADDRESS(kernel_dll, SleepConditionVariableCS);
    auto const wake_all_condition_variable = GET_PROC_ADDRESS(kernel_dll, WakeAllConditionVariable);

    #undef GET_PROC_ADDRESS

    if (sleep_condition_variable_cs && wake_all_condition_variable)
    {
        g_sleep_condition_variable_cs = sleep_condition_variable_cs;
        g_wake_all_condition_variable = wake_all_condition_variable;
    }
    else
    {
        g_tss_event = CreateEventW(NULL, TRUE, FALSE, NULL);
        if (g_tss_event == nullptr)
        {
            __scrt_fastfail(FAST_FAIL_FATAL_APP_EXIT);
        }
    }
}

// Terminator for synchronization data structures.
static void __cdecl __scrt_uninitialize_thread_safe_statics() noexcept
{
    DeleteCriticalSection(&g_tss_mutex);
    if (g_tss_event != nullptr)
    {
        CloseHandle(g_tss_event);
    }
}

// Initializer for synchronization data structures.
static int __cdecl __scrt_initialize_thread_safe_statics() noexcept
{
    __scrt_initialize_thread_safe_statics_platform_specific();

    // If CRT initialization was skipped then we should initialize the atexit tables.
    // This will only be needed when using a managed DLL with /NOENTRY specified.
    if (!__scrt_initialize_onexit_tables(__scrt_module_type::dll))
    {
        __scrt_fastfail(FAST_FAIL_FATAL_APP_EXIT);
    }
    atexit(__scrt_uninitialize_thread_safe_statics);
    return 0;
}

_CRTALLOC(".CRT$XIC") static _PIFV __scrt_initialize_tss_var = __scrt_initialize_thread_safe_statics;
#endif // _USE_VISTA_THREAD_SAFE_STATICS

// Helper functions for accessing the mutex and condition variable.  Can be replaced with
// more suitable data structures provided by the CRT, preferably ones that use the most
// efficient synchronization primitives available on the platform.
// This is not intended to be a recursive lock.
extern "C" void __cdecl _Init_thread_lock()
{
#if _USE_VISTA_THREAD_SAFE_STATICS
    AcquireSRWLockExclusive(&g_tss_srw);
#else // ^^^ _USE_VISTA_THREAD_SAFE_STATICS ^^^ // vvv !_USE_VISTA_THREAD_SAFE_STATICS vvv
    EnterCriticalSection(&g_tss_mutex);
#endif // _USE_VISTA_THREAD_SAFE_STATICS
}

extern "C" void __cdecl _Init_thread_unlock()
{
#if _USE_VISTA_THREAD_SAFE_STATICS
    ReleaseSRWLockExclusive(&g_tss_srw);
#else // ^^^ _USE_VISTA_THREAD_SAFE_STATICS ^^^ // vvv !_USE_VISTA_THREAD_SAFE_STATICS vvv
    LeaveCriticalSection(&g_tss_mutex);
#endif // _USE_VISTA_THREAD_SAFE_STATICS
}

// Wait on the condition variable.  In the XP implementation using only a Windows event
// we can't guarantee that we'll ever actually receive the notification signal, so we
// must use a non-infinite timeout.  This is not optimal: we may wake up early if the
// initializer is long-running, or we may miss the signal and not wake up until the
// timeout expires.  The signal may be missed because the sleeping threads may be
// stolen by the kernel to service an APC, or due to the race condition between the
// unlock call and the WaitForSingleObject call.
extern "C" void __cdecl _Init_thread_wait(DWORD const timeout)
{
#if _USE_VISTA_THREAD_SAFE_STATICS
    SleepConditionVariableSRW(&g_tss_cv, &g_tss_srw, timeout, 0);
#else // ^^^ _USE_VISTA_THREAD_SAFE_STATICS ^^^ // vvv !_USE_VISTA_THREAD_SAFE_STATICS vvv //
    if (g_sleep_condition_variable_cs)
    {
        // Vista+ code is first because it's most likely
        g_sleep_condition_variable_cs(&g_tss_cv, &g_tss_mutex, timeout);
        return;
    }

    _ASSERT(timeout != INFINITE);
    _Init_thread_unlock();
    WaitForSingleObjectEx(g_tss_event, timeout, FALSE);
    _Init_thread_lock();
#endif // _USE_VISTA_THREAD_SAFE_STATICS
}

extern "C" void __cdecl _Init_thread_notify()
{
#if _USE_VISTA_THREAD_SAFE_STATICS
    WakeAllConditionVariable(&g_tss_cv);
#else // ^^^ _USE_VISTA_THREAD_SAFE_STATICS ^^^ // vvv !_USE_VISTA_THREAD_SAFE_STATICS vvv
    if (g_wake_all_condition_variable)
    {
        // Vista+ code is first because it's most likely
        g_wake_all_condition_variable(&g_tss_cv);
        return;
    }

    SetEvent(g_tss_event);
    ResetEvent(g_tss_event);
#endif // _USE_VISTA_THREAD_SAFE_STATICS
}

// Control access to the initialization expression.  Only one thread may leave
// this function before the variable has completed initialization, this thread
// will perform initialization.  All other threads are blocked until the
// initialization completes or fails due to an exception.
extern "C" void __cdecl _Init_thread_header(int* const pOnce) noexcept
{
    _Init_thread_lock();

    if (*pOnce == uninitialized)
    {
        *pOnce = being_initialized;
    }
    else
    {
        while (*pOnce == being_initialized)
        {
            // Timeout can be replaced with an infinite wait when XP support is
            // removed or the XP-based condition variable is sophisticated enough
            // to guarantee all waiting threads will be woken when the variable is
            // signalled.
            _Init_thread_wait(xp_timeout);

            if (*pOnce == uninitialized)
            {
                *pOnce = being_initialized;
                _Init_thread_unlock();
                return;
            }
        }
        _Init_thread_epoch = _Init_global_epoch;
    }

    _Init_thread_unlock();
}

// Abort processing of the initializer due to an exception.  Reset the state
// to uninitialized and release waiting threads (one of which will take over
// initialization, any remaining will again sleep).
extern "C" void __cdecl _Init_thread_abort(int* const pOnce) noexcept
{
    _Init_thread_lock();
    *pOnce = uninitialized;
    _Init_thread_unlock();
    _Init_thread_notify();
}

// Called by the thread that completes initialization of a variable.
// Increment the global and per thread counters, mark the variable as
// initialized, and release waiting threads.
extern "C" void __cdecl _Init_thread_footer(int* const pOnce) noexcept
{
    _Init_thread_lock();
    ++_Init_global_epoch;
    *pOnce = _Init_global_epoch;
    _Init_thread_epoch = _Init_global_epoch;
    _Init_thread_unlock();
    _Init_thread_notify();
}

需要锁的原因源于 ISO C++ 标准 ( [stmt.dcl]/4 ),它要求块范围的局部变量static以线程安全的方式精确初始化一次(这部分也在 MSDN 中进行了解释关于/Zc:threadSafeInit的文章,它可以作为编译器扩展来放宽这个要求)。


推荐阅读