diff --git a/winsup/cygwin/ChangeLog b/winsup/cygwin/ChangeLog index a1f49571d..82a8a58b3 100644 --- a/winsup/cygwin/ChangeLog +++ b/winsup/cygwin/ChangeLog @@ -1,3 +1,38 @@ +2015-07-05 Corinna Vinschen + + * miscfuncs.cc (struct pthread_wrapper_arg): Add member guardsize. + (pthread_wrapper): Set thread stack guarantee according to guardsize. + Tweak assembler code so that $rax/$eax is not required by GCC to + prepare the wrapper_arg value. + (CygwinCreateThread): Fix deadzone handling. Drop setting a "POSIX" + guardpage (aka page w/ PAGE_NOACCESS). Always use Windows guard + pages instead. On post-XP systems (providing SetThreadStackGuarantee) + always set up stack Windows like with reserved/commited areas and + movable guard pages. Only on XP set up stack fully commited if the + guardpage size is not the default system guardpage size. + Fill out pthread_wrapper_arg::guardsize. Improve comments. + * resource.cc: Implement RSTACK_LIMIT Linux-like. + (DEFAULT_STACKSIZE): New macro. + (DEFAULT_STACKGUARD): Ditto. + (rlimit_stack_guard): New muto. + (rlimit_stack): New global variable holding current RSTACK_LIMIT values. + (__set_rlimit_stack): Set rlimit_stack under lock. + (__get_rlimit_stack): Initialize rlimit_stack from executable header + and return rlimit_stack values under lock. + (get_rlimit_stack): Filtering function to return useful default + stacksize from rlimit_stack.rlim_cur value. + (getrlimit): Call __get_rlimit_stack in RLIMIT_STACK case. + (setrlimit): Call __set_rlimit_stack in RLIMIT_STACK case. + * thread.cc (pthread::create): Fetch default stacksize calling + get_rlimit_stack. + (pthread_attr::pthread_attr): Fetch default guardsize calling + wincap.def_guard_page_size. + (pthread_attr_getstacksize): Fetch default stacksize calling + get_rlimit_stack. + * thread.h (PTHREAD_DEFAULT_STACKSIZE): Remove. + (PTHREAD_DEFAULT_GUARDSIZE): Remove. + (get_rlimit_stack): Declare. + 2015-07-05 Corinna Vinschen * fhandler_process.cc (heap_info::heap_info): Disable fetching heap info diff --git a/winsup/cygwin/miscfuncs.cc b/winsup/cygwin/miscfuncs.cc index 7f324b946..4a7a1b858 100644 --- a/winsup/cygwin/miscfuncs.cc +++ b/winsup/cygwin/miscfuncs.cc @@ -560,6 +560,7 @@ struct pthread_wrapper_arg PBYTE stackaddr; PBYTE stackbase; PBYTE stacklimit; + ULONG guardsize; }; DWORD WINAPI @@ -592,7 +593,14 @@ pthread_wrapper (PVOID arg) The below assembler code will release the OS stack after switching to our new stack. */ wrapper_arg.stackaddr = dealloc_addr; - + /* On post-XP systems, set thread stack guarantee matching the guardsize. + Note that the guardsize is one page bigger than the guarantee. */ + if (wincap.has_set_thread_stack_guarantee () + && wrapper_arg.guardsize > wincap.def_guard_page_size ()) + { + wrapper_arg.guardsize -= wincap.page_size (); + SetThreadStackGuarantee (&wrapper_arg.guardsize); + } /* Initialize new _cygtls. */ _my_tls.init_thread (wrapper_arg.stackbase - CYGTLS_PADSIZE, (DWORD (*)(void*, void*)) wrapper_arg.func); @@ -632,7 +640,7 @@ pthread_wrapper (PVOID arg) #endif #ifdef __x86_64__ __asm__ ("\n\ - movq %[WRAPPER_ARG], %%rbx # Load &wrapper_arg into rbx \n\ + leaq %[WRAPPER_ARG], %%rbx # Load &wrapper_arg into rbx \n\ movq (%%rbx), %%r12 # Load thread func into r12 \n\ movq 8(%%rbx), %%r13 # Load thread arg into r13 \n\ movq 16(%%rbx), %%rcx # Load stackaddr into rcx \n\ @@ -652,11 +660,11 @@ pthread_wrapper (PVOID arg) # register r13 and then just call the function. \n\ movq %%r13, %%rcx # Move thread arg to 1st arg reg\n\ call *%%r12 # Call thread func \n" - : : [WRAPPER_ARG] "r" (&wrapper_arg), + : : [WRAPPER_ARG] "o" (wrapper_arg), [CYGTLS] "i" (CYGTLS_PADSIZE)); #else __asm__ ("\n\ - movl %[WRAPPER_ARG], %%ebx # Load &wrapper_arg into ebx \n\ + leal %[WRAPPER_ARG], %%ebx # Load &wrapper_arg into ebx \n\ movl (%%ebx), %%eax # Load thread func into eax \n\ movl 4(%%ebx), %%ecx # Load thread arg into ecx \n\ movl 8(%%ebx), %%edx # Load stackaddr into edx \n\ @@ -683,7 +691,7 @@ pthread_wrapper (PVOID arg) # stack in the expected spot. \n\ popl %%eax # Pop thread_func address \n\ call *%%eax # Call thread func \n" - : : [WRAPPER_ARG] "r" (&wrapper_arg), + : : [WRAPPER_ARG] "o" (wrapper_arg), [CYGTLS] "i" (CYGTLS_PADSIZE)); #endif /* pthread::thread_init_wrapper calls pthread::exit, which @@ -777,7 +785,8 @@ CygwinCreateThread (LPTHREAD_START_ROUTINE thread_func, PVOID thread_arg, if (stackaddr) { - /* If the application provided the stack, just use it. */ + /* If the application provided the stack, just use it. There won't + be any stack overflow handling! */ wrapper_arg->stackaddr = (PBYTE) stackaddr; wrapper_arg->stackbase = (PBYTE) stackaddr + stacksize; } @@ -790,10 +799,8 @@ CygwinCreateThread (LPTHREAD_START_ROUTINE thread_func, PVOID thread_arg, real_guardsize = roundup2 (guardsize, wincap.page_size ()); /* Add the guardsize to the stacksize */ real_stacksize += real_guardsize; - /* If we use the default Windows guardpage method, we have to take - the 2 pages dead zone into account. */ - if (real_guardsize == wincap.page_size ()) - real_stacksize += 2 * wincap.page_size (); + /* Take dead zone page into account, which always stays uncommited. */ + real_stacksize += wincap.page_size (); /* Now roundup the result to the next allocation boundary. */ real_stacksize = roundup2 (real_stacksize, wincap.allocation_granularity ()); @@ -811,46 +818,63 @@ CygwinCreateThread (LPTHREAD_START_ROUTINE thread_func, PVOID thread_arg, #endif if (!real_stackaddr) return NULL; - /* Set up committed region. Two cases: */ - if (real_guardsize != wincap.page_size ()) + /* Set up committed region. We have two cases: */ + if (!wincap.has_set_thread_stack_guarantee () + && real_guardsize != wincap.def_guard_page_size ()) { - /* If guardsize is set to something other than the page size, we - commit the entire stack and, if guardsize is > 0, we set up a - POSIX guardpage. We don't set up a Windows guardpage. */ - if (!VirtualAlloc (real_stackaddr, real_guardsize, MEM_COMMIT, - PAGE_NOACCESS)) + /* If guardsize is set to something other than the default guard page + size, and if we're running on Windows XP 32 bit, we commit the + entire stack, and, if guardsize is > 0, set up a guard page. */ + real_stacklimit = (PBYTE) real_stackaddr + wincap.page_size (); + if (real_guardsize + && !VirtualAlloc (real_stacklimit, real_guardsize, MEM_COMMIT, + PAGE_READWRITE | PAGE_GUARD)) goto err; - real_stacklimit = (PBYTE) real_stackaddr + real_guardsize; - if (!VirtualAlloc (real_stacklimit, real_stacksize - real_guardsize, + real_stacklimit += real_guardsize; + if (!VirtualAlloc (real_stacklimit, real_stacksize - real_guardsize + - wincap.page_size (), MEM_COMMIT, PAGE_READWRITE)) goto err; } else { - /* If guardsize is exactly the page_size, we can assume that the - application will behave Windows conformant in terms of stack usage. - We can especially assume that it never allocates more than one - page at a time (alloca/_chkstk). Therefore, this is the default - case which allows a Windows compatible stack setup with a - reserved region, a guard page, and a commited region. We don't - need to set up a POSIX guardpage since Windows already handles - stack overflow: Trying to extend the stack into the last three - pages of the stack results in a SEGV. - We always commit 64K here, starting with the guardpage. */ + /* Otherwise we set up the stack like the OS does, with a reserved + region, the guard pages, and a commited region. We commit the + stack commit size from the executable header, but at least + PTHREAD_STACK_MIN (64K). */ + static ULONG exe_commitsize; + + if (!exe_commitsize) + { + PIMAGE_DOS_HEADER dosheader; + PIMAGE_NT_HEADERS ntheader; + + dosheader = (PIMAGE_DOS_HEADER) GetModuleHandle (NULL); + ntheader = (PIMAGE_NT_HEADERS) + ((PBYTE) dosheader + dosheader->e_lfanew); + exe_commitsize = ntheader->OptionalHeader.SizeOfStackCommit; + exe_commitsize = roundup2 (exe_commitsize, wincap.page_size ()); + } + ULONG commitsize = exe_commitsize; + if (commitsize > real_stacksize - real_guardsize + - wincap.page_size ()) + commitsize = real_stacksize - real_guardsize - wincap.page_size (); + else if (commitsize < PTHREAD_STACK_MIN) + commitsize = PTHREAD_STACK_MIN; real_stacklimit = (PBYTE) real_stackaddr + real_stacksize - - wincap.allocation_granularity (); - if (!VirtualAlloc (real_stacklimit, wincap.page_size (), MEM_COMMIT, - PAGE_READWRITE | PAGE_GUARD)) + - commitsize - real_guardsize; + if (!VirtualAlloc (real_stacklimit, real_guardsize, + MEM_COMMIT, PAGE_READWRITE | PAGE_GUARD)) goto err; - real_stacklimit += wincap.page_size (); - if (!VirtualAlloc (real_stacklimit, wincap.allocation_granularity () - - wincap.page_size (), MEM_COMMIT, + real_stacklimit += real_guardsize; + if (!VirtualAlloc (real_stacklimit, commitsize, MEM_COMMIT, PAGE_READWRITE)) goto err; } wrapper_arg->stackaddr = (PBYTE) real_stackaddr; wrapper_arg->stackbase = (PBYTE) real_stackaddr + real_stacksize; wrapper_arg->stacklimit = real_stacklimit; + wrapper_arg->guardsize = real_guardsize; } /* Use the STACK_SIZE_PARAM_IS_A_RESERVATION parameter so only the minimum size for a thread stack is reserved by the OS. Note that we diff --git a/winsup/cygwin/release/2.1.0 b/winsup/cygwin/release/2.1.0 index da484f5f4..aca69cdf3 100644 --- a/winsup/cygwin/release/2.1.0 +++ b/winsup/cygwin/release/2.1.0 @@ -1,6 +1,12 @@ What's new: ----------- +- Handle pthread stacksizes as in GLibc: Default to RLIMIT_STACK resource. + Allow to set RLIMIT_STACK via setrlimit. Default RLIMIT_STACK to value + from executable header as described on + https://msdn.microsoft.com/en-us/library/windows/desktop/ms686774.aspx + Default stacksize to 2 Megs in case RLIMIT_STACK is set to RLIM_INFINITY. + - First cut of an implementation to allow signal handlers running on an alternate signal stack. diff --git a/winsup/cygwin/resource.cc b/winsup/cygwin/resource.cc index 895ba7f33..a5a23a4d8 100644 --- a/winsup/cygwin/resource.cc +++ b/winsup/cygwin/resource.cc @@ -111,6 +111,61 @@ getrusage (int intwho, struct rusage *rusage_in) return res; } +/* Default stacksize in case RLIMIT_STACK is RLIM_INFINITY is 2 Megs with + system-dependent number of guard pages. The pthread stacksize does not + include the guardpage size, so we have to subtract the default guardpage + size. Additionally the Windows stack handling disallows to commit the + last page, so we subtract it, too. */ +#define DEFAULT_STACKSIZE (2 * 1024 * 1024) +#define DEFAULT_STACKGUARD (wincap.def_guard_page_size() + wincap.page_size ()) + +muto NO_COPY rlimit_stack_guard; +static struct rlimit rlimit_stack = { 0, RLIM_INFINITY }; + +static void +__set_rlimit_stack (const struct rlimit *rlp) +{ + rlimit_stack_guard.init ("rlimit_stack_guard")->acquire (); + rlimit_stack = *rlp; + rlimit_stack_guard.release (); +} + +static void +__get_rlimit_stack (struct rlimit *rlp) +{ + rlimit_stack_guard.init ("rlimit_stack_guard")->acquire (); + if (!rlimit_stack.rlim_cur) + { + /* Fetch the default stacksize from the executable header... */ + PIMAGE_DOS_HEADER dosheader; + PIMAGE_NT_HEADERS ntheader; + + dosheader = (PIMAGE_DOS_HEADER) GetModuleHandle (NULL); + ntheader = (PIMAGE_NT_HEADERS) ((PBYTE) dosheader + dosheader->e_lfanew); + rlimit_stack.rlim_cur = ntheader->OptionalHeader.SizeOfStackReserve; + /* ...and subtract the guardpages. */ + rlimit_stack.rlim_cur -= DEFAULT_STACKGUARD; + } + *rlp = rlimit_stack; + rlimit_stack_guard.release (); +} + +size_t +get_rlimit_stack (void) +{ + struct rlimit rl; + + __get_rlimit_stack (&rl); + /* RLIM_INFINITY doesn't make much sense. As in glibc, use an + "architecture-specific default". */ + if (rl.rlim_cur == RLIM_INFINITY) + rl.rlim_cur = DEFAULT_STACKSIZE - DEFAULT_STACKGUARD; + /* Always return at least minimum stacksize. */ + else if (rl.rlim_cur < PTHREAD_STACK_MIN) + rl.rlim_cur = PTHREAD_STACK_MIN; + return (size_t) rl.rlim_cur; +} + extern "C" int getrlimit (int resource, struct rlimit *rlp) { @@ -127,32 +182,7 @@ getrlimit (int resource, struct rlimit *rlp) case RLIMIT_AS: break; case RLIMIT_STACK: - PTEB teb; - /* 2015-06-26: Originally rlim_cur returned the size of the still - available stack area on the current stack, rlim_max the total size - of the current stack. Two problems: - - - Per POSIX, RLIMIT_STACK returns "the maximum size of the initial - thread's stack, in bytes. The implementation does not - automatically grow the stack beyond this limit". - - - With the implementation of sigaltstack, the current stack is not - necessarily the "initial thread's stack" anymore. Rather, when - called from a signal handler running on the alternate stack, - RLIMIT_STACK should return the size of the original stack. - - rlim_cur is now the size of the stack. For system-provided stacks - it's the size between DeallocationStack and StackBase. For - application-provided stacks (via pthread_attr_setstack), - DeallocationStack is NULL, but StackLimit points to the bottom - of the stack. - - rlim_max is set to RLIM_INFINITY since there's no hard limit - for stack sizes on Windows. */ - teb = NtCurrentTeb (); - rlp->rlim_cur = (rlim_t) teb->Tib.StackBase - - (rlim_t) (teb->DeallocationStack - ?: teb->Tib.StackLimit); + __get_rlimit_stack (rlp); break; case RLIMIT_NOFILE: rlp->rlim_cur = getdtablesize (); @@ -206,6 +236,9 @@ setrlimit (int resource, const struct rlimit *rlp) if (rlp->rlim_cur != RLIM_INFINITY) return setdtablesize (rlp->rlim_cur); break; + case RLIMIT_STACK: + __set_rlimit_stack (rlp); + break; default: set_errno (EINVAL); __leave; diff --git a/winsup/cygwin/thread.cc b/winsup/cygwin/thread.cc index 9320868f5..b92a80620 100644 --- a/winsup/cygwin/thread.cc +++ b/winsup/cygwin/thread.cc @@ -475,7 +475,7 @@ pthread::create (void *(*func) (void *), pthread_attr *newattr, mutex.lock (); /* stackaddr holds the uppermost stack address. See the comments in pthread_attr_setstack and pthread_attr_setstackaddr for a description. */ - ULONG stacksize = attr.stacksize ?: PTHREAD_DEFAULT_STACKSIZE; + ULONG stacksize = attr.stacksize ?: get_rlimit_stack (); PVOID stackaddr = attr.stackaddr ? ((caddr_t) attr.stackaddr - stacksize) : NULL; win32_obj_id = CygwinCreateThread (thread_init_wrapper, this, stackaddr, @@ -1093,7 +1093,7 @@ pthread::resume () pthread_attr::pthread_attr ():verifyable_object (PTHREAD_ATTR_MAGIC), joinable (PTHREAD_CREATE_JOINABLE), contentionscope (PTHREAD_SCOPE_PROCESS), inheritsched (PTHREAD_INHERIT_SCHED), stackaddr (NULL), stacksize (0), -guardsize (PTHREAD_DEFAULT_GUARDSIZE) +guardsize (wincap.def_guard_page_size ()) { schedparam.sched_priority = 0; } @@ -2330,7 +2330,7 @@ pthread_attr_getstacksize (const pthread_attr_t *attr, size_t *size) /* If the stacksize has not been set by the application, return the default stacksize. Note that this is different from what pthread_attr_getstack returns. */ - *size = (*attr)->stacksize ?: PTHREAD_DEFAULT_STACKSIZE; + *size = (*attr)->stacksize ?: get_rlimit_stack (); return 0; } diff --git a/winsup/cygwin/thread.h b/winsup/cygwin/thread.h index 3650e9509..a6c735885 100644 --- a/winsup/cygwin/thread.h +++ b/winsup/cygwin/thread.h @@ -16,13 +16,8 @@ details. */ #define WRITE_LOCK 1 #define READ_LOCK 2 -/* Default is a 1 Megs stack with a 4K guardpage. The pthread stacksize - does not include the guardpage size, so we subtract the default guardpage - size. Additionally, the Windows stack handling disallows to use the last - two pages as guard page (tested on XP and W7). That results in a zone of - three pages which have to be subtract to get the actual stack size. */ -#define PTHREAD_DEFAULT_STACKSIZE (1024 * 1024 - 3 * wincap.page_size ()) -#define PTHREAD_DEFAULT_GUARDSIZE (wincap.page_size ()) +/* resource.cc */ +extern size_t get_rlimit_stack (void); #include #include diff --git a/winsup/doc/new-features.xml b/winsup/doc/new-features.xml index be3e3892f..c52574ce4 100644 --- a/winsup/doc/new-features.xml +++ b/winsup/doc/new-features.xml @@ -8,6 +8,14 @@ + +Handle pthread stacksizes as in GLibc: Default to RLIMIT_STACK resource. +Allow to set RLIMIT_STACK via setrlimit. Default RLIMIT_STACK to value +from executable header as described on the MSDN website +Thread Stack Size +Default stacksize to 2 Megs in case RLIMIT_STACK is set to RLIM_INFINITY. + + First cut of an implementation to allow signal handlers running on an alternate signal stack.