From cc55c579a361e3dd8bd4766530e1e6de1eb71664 Mon Sep 17 00:00:00 2001 From: DJ Delorie Date: Thu, 19 Oct 2000 02:13:52 +0000 Subject: [PATCH] * ssp.c: new * ssp.txt: new * Makefile.in: build ssp.exe --- winsup/utils/ChangeLog | 6 + winsup/utils/Makefile.in | 2 +- winsup/utils/ssp.c | 802 +++++++++++++++++++++++++++++++++++++++ winsup/utils/ssp.txt | 125 ++++++ 4 files changed, 934 insertions(+), 1 deletion(-) create mode 100644 winsup/utils/ssp.c create mode 100644 winsup/utils/ssp.txt diff --git a/winsup/utils/ChangeLog b/winsup/utils/ChangeLog index 6c6d5a518..ccecec9a1 100644 --- a/winsup/utils/ChangeLog +++ b/winsup/utils/ChangeLog @@ -1,3 +1,9 @@ +2000-10-18 DJ Delorie + + * ssp.c: new + * ssp.txt: new + * Makefile.in: build ssp.exe + Wed Oct 18 18:57:46 2000 Christopher Faylor * ps.cc (main): Add some extra space to Windows pid output for diff --git a/winsup/utils/Makefile.in b/winsup/utils/Makefile.in index 2ba532626..7ab7edb12 100644 --- a/winsup/utils/Makefile.in +++ b/winsup/utils/Makefile.in @@ -70,7 +70,7 @@ DUMPER_LDFLAGS:=$(ALL_LDFLAGS) $(DUMPER_LIB) PROGS:=mount$(EXEEXT) umount$(EXEEXT) ps$(EXEEXT) kill$(EXEEXT) \ mkpasswd$(EXEEXT) mkgroup$(EXEEXT) cygpath$(EXEEXT) cygcheck$(EXEEXT) \ passwd$(EXEEXT) getfacl$(EXEEXT) setfacl$(EXEEXT) strace$(EXEEXT) \ - regtool$(EXEEXT) + regtool$(EXEEXT) ssp$(EXEEXT) CLEAN_PROGS:=$(PROGS) ifdef build_dumper diff --git a/winsup/utils/ssp.c b/winsup/utils/ssp.c new file mode 100644 index 000000000..114ff3a57 --- /dev/null +++ b/winsup/utils/ssp.c @@ -0,0 +1,802 @@ +/* + * Copyright (c) 2000, Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * A copy of the GNU General Public License can be found at + * http://www.gnu.org/ + * + * Written by DJ Delorie + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#ifdef __GNUC__ +const char *help_text = " +Usage: ssp [options] low_pc high_pc command... + +The SSP is a `single-step profiler' - it uses the debug API to +single-step your program, noting *everything* your program runs, not +just random places hit by random timer interrupts. You must specify +the range of EIP values to profile. For example, you could profile +just a function, or just a line of code, or the whole thing. +Use \"objdump -h\" to find the start of .text and the section following +it; this is what you want. + +There are many options to ssp. Since step-profiling makes your +program run about 1,000 times slower than normal, it's best to +understand all the options so that you can narrow down the parts +of your program you need to single-step. + +-v = verbose messages about debug events. + +-d, -e = disable/enable single-stepping by default. Use +OutputDebugString (\"ssp on\") to enable stepping, or \"ssp off\" to +disable it. Thus, you can profile a single function call or block. + +-t = trace every EIP value to a file TRACE.SSP. This gets big *fast*. +Use \"addr2line -C -f -s -e foo.exe < trace.ssp > lines.ssp\" and then +\"perl cvttrace\" to convert to symbolic traces. + +-tc = trace every EIP value to the console. *Lots* slower. + +-s = trace sub-threads too. Dangerous if you have race conditions. + +-dll = enable dll profiling. A chart of relative DLL usage is +produced after the run. + +Examples: + ssp 0x401000 0x403000 hello.exe + ssp -v -d -dll 0x401000 0x440000 foo.exe + +The output is a file \"gmon.out\" that can be read with gprof: + gprof -b foo.exe + +See ssp.txt in the cygwin sources for more information. +"; +#else +char *help_text = "Usage: get cygwin!\n"; +#endif + +#define KERNEL_ADDR 0x77000000 + +#define TRACE_SSP 0 + +#define VERBOSE 1 +#define TIMES 1000 + +/* from winsup/gmon.h */ +struct gmonhdr { + unsigned long lpc; /* base pc address of sample buffer */ + unsigned long hpc; /* max pc address of sampled buffer */ + int ncnt; /* size of sample buffer (plus this header) */ + int version; /* version number */ + int profrate; /* profiling clock rate */ + int spare[3]; /* reserved */ +}; +#define GMONVERSION 0x00051879 +#define HISTCOUNTER unsigned short + +typedef struct { + unsigned int base_address; + int pcount; + int scount; + char *name; +} DllInfo; + +typedef struct { + unsigned int address; + unsigned char real_byte; +} PendingBreakpoints; + +int low_pc=0, high_pc=0; +unsigned int last_pc=0, pc, last_sp=0, sp; +int total_cycles, count; +char *cmd_line; +HANDLE hProcess; +PROCESS_INFORMATION procinfo; +STARTUPINFO startup; +CONTEXT context; +HISTCOUNTER *hits=0; +struct gmonhdr hdr; +int running = 1, profiling = 1; +char dll_name[1024], *dll_ptr, *cp; +int eip; +unsigned opcode_count = 0; + +int stepping_enabled = 1; +int tracing_enabled = 0; +int trace_console = 0; +int trace_all_threads = 0; +int dll_counts = 0; +int verbose = 0; + +#define MAXTHREADS 100 +int active_thread_ids[MAXTHREADS]; +HANDLE active_threads[MAXTHREADS]; +int thread_step_flags[MAXTHREADS]; +int thread_return_address[MAXTHREADS]; +int num_active_threads = 0; +int suspended_count=0; + +#define MAXDLLS 100 +DllInfo dll_info[MAXDLLS]; +int num_dlls=0; + +#define MAXPENDS 100 +PendingBreakpoints pending_breakpoints[MAXPENDS]; +int num_breakpoints=0; + +static void +add_breakpoint (unsigned int address) +{ + int i; + DWORD rv; + static char int3[] = { 0xcc }; + for (i=0; i= num_breakpoints) + num_breakpoints = i+1; +} + +static int +remove_breakpoint (unsigned int address) +{ + int i; + DWORD rv; + for (i=0; ibase_address < b->base_address) + return -1; + return 1; +} + +static char * +addr2dllname (unsigned int addr) +{ + int i; + for (i=num_dlls-1; i>=0; i--) + { + if (dll_info[i].base_address < addr) + { + return dll_info[i].name; + } + } + return (char *)""; +} + +static void +dump_registers (HANDLE thread) +{ + context.ContextFlags = CONTEXT_FULL; + GetThreadContext (thread, &context); + printf ("eax %08lx ebx %08lx ecx %08lx edx %08lx eip\n", + context.Eax, context.Ebx, context.Ecx, context.Edx); + printf ("esi %08lx edi %08lx ebp %08lx esp %08lx %08lx\n", + context.Esi, context.Esi, context.Ebp, context.Esp, context.Eip); +} + +typedef struct Edge { + struct Edge *next; + unsigned int from_pc; + unsigned int to_pc; + unsigned int count; +} Edge; + +Edge *edges[4096]; + +void +store_call_edge (unsigned int from_pc, unsigned int to_pc) +{ + Edge *e; + unsigned int h = ((from_pc + to_pc)>>4) & 4095; + for (e=edges[h]; e; e=e->next) + if (e->from_pc == from_pc && e->to_pc == to_pc) + break; + if (!e) + { + e = (Edge *)malloc (sizeof (Edge)); + e->next = edges[h]; + edges[h] = e; + e->from_pc = from_pc; + e->to_pc = to_pc; + e->count = 0; + } + e->count++; +} + +void +write_call_edges (FILE *f) +{ + int h; + Edge *e; + for (h=0; h<4096; h++) + for (e=edges[h]; e; e=e->next) + fwrite (&(e->from_pc), 1, 3*sizeof (unsigned int), f); +} + +char * +wide_strdup (char *cp) +{ + unsigned short *s = (unsigned short *)cp; + int len; + char *rv; + for (len=0; s[len]; len++); + rv = (char *)malloc (len+1); + for (len=0; s[len]; len++) + rv[len] = s[len]; + rv[len] = 0; + return rv; +} + +void +run_program (char *cmdline) +{ + FILE *tracefile = 0; + int tix, i; + HANDLE hThread; + char *string; + + memset (&startup, 0, sizeof (startup)); + startup.cb = sizeof (startup); + + if (!CreateProcess (0, cmd_line, 0, 0, 0, + CREATE_NEW_PROCESS_GROUP + | CREATE_SUSPENDED + | DEBUG_PROCESS + | DEBUG_ONLY_THIS_PROCESS, + 0, 0, &startup, &procinfo)) + { + fprintf (stderr, "Can't create process: error %ld\n", GetLastError ()); + exit (1); + } + + hProcess = procinfo.hProcess; +#if 0 + printf ("procinfo: %08x %08x %08x %08x\n", + hProcess, procinfo.hThread, procinfo.dwProcessId, procinfo.dwThreadId); +#endif + + active_threads[0] = procinfo.hThread; + active_thread_ids[0] = procinfo.dwThreadId; + thread_step_flags[0] = stepping_enabled; + num_active_threads = 1; + + dll_info[0].base_address = 0; + dll_info[0].pcount = 0; + dll_info[0].scount = 0; + dll_info[0].name = cmd_line; + num_dlls = 1; + + SetThreadPriority (procinfo.hThread, THREAD_PRIORITY_IDLE); + + context.ContextFlags = CONTEXT_FULL; + + ResumeThread (procinfo.hThread); + + total_cycles = 0; + + if (tracing_enabled) + { + tracefile = fopen ("trace.ssp", "w"); + if (!tracefile) + { + tracing_enabled = 0; + perror ("trace.ssp"); + } + } + + running = 1; + while (running) + { + int src, dest; + DWORD rv; + DEBUG_EVENT event; + int contv = DBG_CONTINUE; + + event.dwDebugEventCode = -1; + if (!WaitForDebugEvent (&event, INFINITE)) + { + printf ("idle...\n"); + } + + hThread = lookup_thread_id (event.dwThreadId, &tix); + +#if 0 + printf ("DE: %x/%d %d %d ", + hThread, tix, + event.dwDebugEventCode, num_active_threads); + for (src=0; src=0; i--) + { + if (dll_info[i].base_address < context.Eip) + { + if (hThread == procinfo.hThread) + dll_info[i].pcount++; + else + dll_info[i].scount++; + break; + } + } + } + + if (pc < last_pc || pc > last_pc+10) + { + static int ncalls=0; + static int qq=0; + if (++qq % 100 == 0) + fprintf (stderr, " %08x %d %d \r", + pc, ncalls, opcode_count); + + if (sp == last_sp-4) + { + ncalls++; + store_call_edge (last_pc, pc); + if (last_pc < KERNEL_ADDR && pc > KERNEL_ADDR) + { + int retaddr; + DWORD rv; + ReadProcessMemory (hProcess, + (void *)sp, + (LPVOID)&(retaddr), + 4, &rv); +#if 0 + printf ("call last_pc = %08x pc = %08x rv = %08x\n", + last_pc, pc, retaddr); + /* experimental - try to skip kernel calls for speed */ + add_breakpoint (retaddr); + set_step_threads (event.dwThreadId, 0); +#endif + } + } + } + + total_cycles++; + last_sp = sp; + last_pc = pc; + if (pc >= low_pc && pc < high_pc) + hits[(pc - low_pc)/2] ++; + break; + default: + if (verbose) + { + printf ("exception %ld, ", event.u.Exception.dwFirstChance); + printf ("code: %lx flags: %lx\n", + event.u.Exception.ExceptionRecord.ExceptionCode, + event.u.Exception.ExceptionRecord.ExceptionFlags); + if (event.u.Exception.dwFirstChance == 1) + dump_registers (hThread); + } + contv = DBG_EXCEPTION_NOT_HANDLED; + running = 0; + break; + } + + if (rv != -1) + { + if (pc == thread_return_address[tix]) + { + if (context.EFlags & 0x100) + { + context.EFlags &= ~0x100; /* TRAP (single step) flag */ + SetThreadContext (hThread, &context); + } + } + else if (stepping_enabled) + { + if (!(context.EFlags & 0x100)) + { + context.EFlags |= 0x100; /* TRAP (single step) flag */ + SetThreadContext (hThread, &context); + } + } + } + break; + + case OUTPUT_DEBUG_STRING_EVENT: + string = (char *)malloc (event.u.DebugString.nDebugStringLength+1); + i = ReadProcessMemory (hProcess, + event.u.DebugString.lpDebugStringData, + (LPVOID)string, + event.u.DebugString.nDebugStringLength, + &rv); + if (!i) + { + printf ("error reading memory: %ld %ld\n", rv, GetLastError ()); + } + if (verbose) + printf ("ODS: %x/%d \"%s\"\n", + (int)hThread, tix, string); + + if (strcmp (string, "ssp on") == 0) + { + stepping_enabled = 1; + set_step_threads (event.dwThreadId, 1); + } + + if (strcmp (string, "ssp off") == 0) + { + stepping_enabled = 0; + set_step_threads (event.dwThreadId, 0); + } + + break; + + + case LOAD_DLL_DEBUG_EVENT: + if (verbose) + printf ("load dll %08x:", + (int)event.u.LoadDll.lpBaseOfDll); + + dll_ptr = (char *)"( u n k n o w n ) \0\0"; + if (event.u.LoadDll.lpImageName) + { + ReadProcessMemory (hProcess, + event.u.LoadDll.lpImageName, + (LPVOID)&src, + sizeof (src), + &rv); + if (src) + { + ReadProcessMemory (hProcess, + (void *)src, + (LPVOID)dll_name, + sizeof (dll_name), + &rv); + dll_name[rv] = 0; + dll_ptr = dll_name; + for (cp=dll_name; *cp; cp++) + { + if (*cp == '\\' || *cp == '/') + { + dll_ptr = cp+1; + } + *cp = tolower (*cp); + } + } + } + + + dll_info[num_dlls].base_address + = (unsigned int)event.u.LoadDll.lpBaseOfDll; + dll_info[num_dlls].pcount = 0; + dll_info[num_dlls].scount = 0; + dll_info[num_dlls].name = wide_strdup (dll_ptr); + if (verbose) + printf (" %s\n", dll_info[num_dlls].name); + num_dlls++; + qsort (dll_info, num_dlls, sizeof (DllInfo), dll_sort); + + break; + + case UNLOAD_DLL_DEBUG_EVENT: + if (verbose) + printf ("unload dll\n"); + break; + + case EXIT_PROCESS_DEBUG_EVENT: + if (verbose) + printf ("process %08lx %08lx exit %ld\n", + event.dwProcessId, event.dwThreadId, + event.u.ExitProcess.dwExitCode); + + running = 0; + break; + } + + set_steps (); + ContinueDebugEvent (event.dwProcessId, event.dwThreadId, contv); + } + + count = 0; + for (pc=low_pc; pc high_pc-8) + { + fprintf (stderr, "Hey, low_pc must be lower than high_pc\n"); + exit (1); + } + + hits = (HISTCOUNTER *)malloc (high_pc-low_pc+4); + memset (hits, 0, high_pc-low_pc+4); + + fprintf (stderr, "prun: [%08x,%08x] Running `%s'\n", + low_pc, high_pc, cmd_line); + + run_program (cmd_line); + + hdr.lpc = low_pc; + hdr.hpc = high_pc; + hdr.ncnt = high_pc-low_pc + sizeof (hdr); + hdr.version = GMONVERSION; + hdr.profrate = 100; + + gmon = fopen ("gmon.out", "wb"); + fwrite (&hdr, 1, sizeof (hdr), gmon); + fwrite (hits, 1, high_pc-low_pc, gmon); + write_call_edges (gmon); + fclose (gmon); + + if (dll_counts) + { + /* 1234567 123% 1234567 123% 12345678 xxxxxxxxxxx */ + printf (" Main-Thread Other-Thread BaseAddr DLL Name\n"); + + total_pcount = 0; + total_scount = 0; + for (i=0; i + +The SSP is a program that uses the Win32 debug API to run a program +one ASM instruction at a time. It records the location of each +instruction used, how many times that instruction is used, and all +function calls. The results are saved in a format that is usable by +the profiling program "gprof", although gprof will claim the values +are seconds, they really are instruction counts. More on that later. + +Because the SSP was originally designed to profile the cygwin DLL, it +does not automatically select a block of code to report statistics on. +You must specify the range of memory addresses to keep track of +manually, but it's not hard to figure out what to specify. Use the +"objdump" program to determine the bounds of the target's ".text" +section. Let's say we're profiling cygwin1.dll. Make sure you've +built it with debug symbols (else gprof won't run) and run objdump +like this: + + objdump -h cygwin1.dll + +It will print a report like this: + +cygwin1.dll: file format pei-i386 + +Sections: +Idx Name Size VMA LMA File off Algn + 0 .text 0007ea00 61001000 61001000 00000400 2**2 + CONTENTS, ALLOC, LOAD, READONLY, CODE, DATA + 1 .data 00008000 61080000 61080000 0007ee00 2**2 + CONTENTS, ALLOC, LOAD, DATA + . . . + +The only information we're concerned with are the VMA of the .text +section and the VMA of the section after it (sections are usually +contiguous; you can also add the Size to the VMA to get the end +address). In this case, the VMA is 0x61001000 and the ending address +is either 0x61080000 (start of .data method) or 0x0x6107fa00 (VMA+Size +method). + +There are two basic ways to use SSP - either profiling a whole +program, or selectively profiling parts of the program. + +To profile a whole program, just run ssp without options. By default, +it will step the whole program. Here's a simple example, using the +numbers above: + + ssp 0x61001000 0x61080000 hello.exe + +This will step the whole program. It will take at least 8 minutes on +a PII/300 (yes, really). When it's done, it will create a file called +"gmon.out". You can turn this data file into a readable report with +gprof: + + gprof -b cygwin1.dll + +The "-b" means "skip the help pages". You can omit this until you're +familiar with the report layout. The gprof documentation explains +a lot about this report, but ssp changes a few things. For example, +the first part of the report reports the amount of time spent in each +function, like this: + +Each sample counts as 0.01 seconds. + % cumulative self self total + time seconds seconds calls ms/call ms/call name + 10.02 231.22 72.43 46 1574.57 1574.57 strcspn + 7.95 288.70 57.48 130 442.15 442.15 strncasematch + +The "seconds" columns are really CPU opcodes, 1/100 second per opcode. +So, "231.22" above means 23,122 opcodes. The ms/call values are 10x +too big; 1574.57 means 157.457 opcodes per call. Similar adjustments +need to be made for the "self" and "children" columns in the second +part of the report. + +OK, so now we've got a huge report that took a long time to generate, +and we've identified a spot we want to work on optimizing. Let's say +it's the time() function. We can use SSP to selectively profile this +function by using OutputDebugString() to control SSP from within the +program. Here's a sample program: + + #include + main() + { + time_t t; + OutputDebugString("ssp on"); + time(&t); + OutputDebugString("ssp off"); + } + +Then, add the "-d" option to ssp to default to *disabling* profiling. +The program will run at full speed until the first OutputDebugString, +then step until the second. + + ssp -d 0x61001000 0x61080000 hello.exe + +You can then use gprof (as usual) to see the performance profile for +just that portion of the program's execution. + +OK, now for the other ssp options, and when to use them: + +"-v" - verbose. This prints messages about threads starting and +stopping, OutputDebugString calls, DLLs loading, etc. + +"-t" and "-tc" - tracing. With -t, *every* step's address is written +to the file "trace.ssp". This can be used to help debug functions, +since it can trace multiple threads. Clever use of scripts can match +addresses with disassembled opcodes if needed. Warning: creates +*huge* files, very quickly. "-tc" prints each address to the console, +useful for debugging key chunks of assembler. + +"-s" - subthreads. Usually, you only need to trace the main thread, +but sometimes you need to trace all threads, so this enables that. +It's also needed when you want to profile a function that only a +subthread calls. However, using OutputDebugString automatically +enables profiling on the thread that called it, not the main thread. + +"-dll" - dll profiling. Generates a pretty table of how much time was +spent in each dll the program used. No sense optimizing a function in +your program if most of the time is spent in the DLL. + +I usually use the -v, -s, and -dll options: + + ssp -v -s -dll -d 0x61001000 0x61080000 hello.exe