diff --git a/winsup/utils/ChangeLog b/winsup/utils/ChangeLog index 6c6d5a518..ccecec9a1 100644 --- a/winsup/utils/ChangeLog +++ b/winsup/utils/ChangeLog @@ -1,3 +1,9 @@ +2000-10-18 DJ Delorie <dj@redhat.com> + + * ssp.c: new + * ssp.txt: new + * Makefile.in: build ssp.exe + Wed Oct 18 18:57:46 2000 Christopher Faylor <cgf@cygnus.com> * ps.cc (main): Add some extra space to Windows pid output for diff --git a/winsup/utils/Makefile.in b/winsup/utils/Makefile.in index 2ba532626..7ab7edb12 100644 --- a/winsup/utils/Makefile.in +++ b/winsup/utils/Makefile.in @@ -70,7 +70,7 @@ DUMPER_LDFLAGS:=$(ALL_LDFLAGS) $(DUMPER_LIB) PROGS:=mount$(EXEEXT) umount$(EXEEXT) ps$(EXEEXT) kill$(EXEEXT) \ mkpasswd$(EXEEXT) mkgroup$(EXEEXT) cygpath$(EXEEXT) cygcheck$(EXEEXT) \ passwd$(EXEEXT) getfacl$(EXEEXT) setfacl$(EXEEXT) strace$(EXEEXT) \ - regtool$(EXEEXT) + regtool$(EXEEXT) ssp$(EXEEXT) CLEAN_PROGS:=$(PROGS) ifdef build_dumper diff --git a/winsup/utils/ssp.c b/winsup/utils/ssp.c new file mode 100644 index 000000000..114ff3a57 --- /dev/null +++ b/winsup/utils/ssp.c @@ -0,0 +1,802 @@ +/* + * Copyright (c) 2000, Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * A copy of the GNU General Public License can be found at + * http://www.gnu.org/ + * + * Written by DJ Delorie <dj@redhat.com> + * + */ + +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <fcntl.h> +#include <time.h> +#include <ctype.h> +#include <windows.h> + +#ifdef __GNUC__ +const char *help_text = " +Usage: ssp [options] low_pc high_pc command... + +The SSP is a `single-step profiler' - it uses the debug API to +single-step your program, noting *everything* your program runs, not +just random places hit by random timer interrupts. You must specify +the range of EIP values to profile. For example, you could profile +just a function, or just a line of code, or the whole thing. +Use \"objdump -h\" to find the start of .text and the section following +it; this is what you want. + +There are many options to ssp. Since step-profiling makes your +program run about 1,000 times slower than normal, it's best to +understand all the options so that you can narrow down the parts +of your program you need to single-step. + +-v = verbose messages about debug events. + +-d, -e = disable/enable single-stepping by default. Use +OutputDebugString (\"ssp on\") to enable stepping, or \"ssp off\" to +disable it. Thus, you can profile a single function call or block. + +-t = trace every EIP value to a file TRACE.SSP. This gets big *fast*. +Use \"addr2line -C -f -s -e foo.exe < trace.ssp > lines.ssp\" and then +\"perl cvttrace\" to convert to symbolic traces. + +-tc = trace every EIP value to the console. *Lots* slower. + +-s = trace sub-threads too. Dangerous if you have race conditions. + +-dll = enable dll profiling. A chart of relative DLL usage is +produced after the run. + +Examples: + ssp 0x401000 0x403000 hello.exe + ssp -v -d -dll 0x401000 0x440000 foo.exe + +The output is a file \"gmon.out\" that can be read with gprof: + gprof -b foo.exe + +See ssp.txt in the cygwin sources for more information. +"; +#else +char *help_text = "Usage: get cygwin!\n"; +#endif + +#define KERNEL_ADDR 0x77000000 + +#define TRACE_SSP 0 + +#define VERBOSE 1 +#define TIMES 1000 + +/* from winsup/gmon.h */ +struct gmonhdr { + unsigned long lpc; /* base pc address of sample buffer */ + unsigned long hpc; /* max pc address of sampled buffer */ + int ncnt; /* size of sample buffer (plus this header) */ + int version; /* version number */ + int profrate; /* profiling clock rate */ + int spare[3]; /* reserved */ +}; +#define GMONVERSION 0x00051879 +#define HISTCOUNTER unsigned short + +typedef struct { + unsigned int base_address; + int pcount; + int scount; + char *name; +} DllInfo; + +typedef struct { + unsigned int address; + unsigned char real_byte; +} PendingBreakpoints; + +int low_pc=0, high_pc=0; +unsigned int last_pc=0, pc, last_sp=0, sp; +int total_cycles, count; +char *cmd_line; +HANDLE hProcess; +PROCESS_INFORMATION procinfo; +STARTUPINFO startup; +CONTEXT context; +HISTCOUNTER *hits=0; +struct gmonhdr hdr; +int running = 1, profiling = 1; +char dll_name[1024], *dll_ptr, *cp; +int eip; +unsigned opcode_count = 0; + +int stepping_enabled = 1; +int tracing_enabled = 0; +int trace_console = 0; +int trace_all_threads = 0; +int dll_counts = 0; +int verbose = 0; + +#define MAXTHREADS 100 +int active_thread_ids[MAXTHREADS]; +HANDLE active_threads[MAXTHREADS]; +int thread_step_flags[MAXTHREADS]; +int thread_return_address[MAXTHREADS]; +int num_active_threads = 0; +int suspended_count=0; + +#define MAXDLLS 100 +DllInfo dll_info[MAXDLLS]; +int num_dlls=0; + +#define MAXPENDS 100 +PendingBreakpoints pending_breakpoints[MAXPENDS]; +int num_breakpoints=0; + +static void +add_breakpoint (unsigned int address) +{ + int i; + DWORD rv; + static char int3[] = { 0xcc }; + for (i=0; i<num_breakpoints; i++) + { + if (pending_breakpoints[i].address == address) + return; + if (pending_breakpoints[i].address == 0) + break; + } + if (i == MAXPENDS) + return; + pending_breakpoints[i].address = address; + ReadProcessMemory (hProcess, + (void *)address, + &(pending_breakpoints[i].real_byte), + 1, &rv); + + WriteProcessMemory (hProcess, + (void *)address, + (LPVOID)int3, 1, &rv); + if (i >= num_breakpoints) + num_breakpoints = i+1; +} + +static int +remove_breakpoint (unsigned int address) +{ + int i; + DWORD rv; + for (i=0; i<num_breakpoints; i++) + { + if (pending_breakpoints[i].address == address) + { + pending_breakpoints[i].address = 0; + WriteProcessMemory (hProcess, + (void *)address, + &(pending_breakpoints[i].real_byte), + 1, &rv); + return 1; + } + } + return 0; +} + +static HANDLE +lookup_thread_id (int threadId, int *tix) +{ + int i; + for (i=0; i<num_active_threads; i++) + if (active_thread_ids[i] == threadId) + { + if (tix) *tix = i; + return active_threads[i]; + } + return 0; +} + +static void +set_step_threads (int threadId, int trace) +{ + int rv, tix; + HANDLE thread = lookup_thread_id (threadId, &tix); + + rv = GetThreadContext (thread, &context); + if (rv != -1) + { + thread_step_flags[tix] = trace; + if (trace) + context.EFlags |= 0x100; /* TRAP (single step) flag */ + else + context.EFlags &= ~0x100; /* TRAP (single step) flag */ + SetThreadContext (thread, &context); + } +} + +static void +set_steps () +{ + int i, s; + for (i=0; i<num_active_threads; i++) + { + GetThreadContext (active_threads[i], &context); + s = context.EFlags & 0x0100; + if (!s && thread_step_flags[i]) + { + set_step_threads (active_thread_ids[i], 1); + } + } +} + +static int +dll_sort (const void *va, const void *vb) +{ + DllInfo *a = (DllInfo *)va; + DllInfo *b = (DllInfo *)vb; + if (a->base_address < b->base_address) + return -1; + return 1; +} + +static char * +addr2dllname (unsigned int addr) +{ + int i; + for (i=num_dlls-1; i>=0; i--) + { + if (dll_info[i].base_address < addr) + { + return dll_info[i].name; + } + } + return (char *)""; +} + +static void +dump_registers (HANDLE thread) +{ + context.ContextFlags = CONTEXT_FULL; + GetThreadContext (thread, &context); + printf ("eax %08lx ebx %08lx ecx %08lx edx %08lx eip\n", + context.Eax, context.Ebx, context.Ecx, context.Edx); + printf ("esi %08lx edi %08lx ebp %08lx esp %08lx %08lx\n", + context.Esi, context.Esi, context.Ebp, context.Esp, context.Eip); +} + +typedef struct Edge { + struct Edge *next; + unsigned int from_pc; + unsigned int to_pc; + unsigned int count; +} Edge; + +Edge *edges[4096]; + +void +store_call_edge (unsigned int from_pc, unsigned int to_pc) +{ + Edge *e; + unsigned int h = ((from_pc + to_pc)>>4) & 4095; + for (e=edges[h]; e; e=e->next) + if (e->from_pc == from_pc && e->to_pc == to_pc) + break; + if (!e) + { + e = (Edge *)malloc (sizeof (Edge)); + e->next = edges[h]; + edges[h] = e; + e->from_pc = from_pc; + e->to_pc = to_pc; + e->count = 0; + } + e->count++; +} + +void +write_call_edges (FILE *f) +{ + int h; + Edge *e; + for (h=0; h<4096; h++) + for (e=edges[h]; e; e=e->next) + fwrite (&(e->from_pc), 1, 3*sizeof (unsigned int), f); +} + +char * +wide_strdup (char *cp) +{ + unsigned short *s = (unsigned short *)cp; + int len; + char *rv; + for (len=0; s[len]; len++); + rv = (char *)malloc (len+1); + for (len=0; s[len]; len++) + rv[len] = s[len]; + rv[len] = 0; + return rv; +} + +void +run_program (char *cmdline) +{ + FILE *tracefile = 0; + int tix, i; + HANDLE hThread; + char *string; + + memset (&startup, 0, sizeof (startup)); + startup.cb = sizeof (startup); + + if (!CreateProcess (0, cmd_line, 0, 0, 0, + CREATE_NEW_PROCESS_GROUP + | CREATE_SUSPENDED + | DEBUG_PROCESS + | DEBUG_ONLY_THIS_PROCESS, + 0, 0, &startup, &procinfo)) + { + fprintf (stderr, "Can't create process: error %ld\n", GetLastError ()); + exit (1); + } + + hProcess = procinfo.hProcess; +#if 0 + printf ("procinfo: %08x %08x %08x %08x\n", + hProcess, procinfo.hThread, procinfo.dwProcessId, procinfo.dwThreadId); +#endif + + active_threads[0] = procinfo.hThread; + active_thread_ids[0] = procinfo.dwThreadId; + thread_step_flags[0] = stepping_enabled; + num_active_threads = 1; + + dll_info[0].base_address = 0; + dll_info[0].pcount = 0; + dll_info[0].scount = 0; + dll_info[0].name = cmd_line; + num_dlls = 1; + + SetThreadPriority (procinfo.hThread, THREAD_PRIORITY_IDLE); + + context.ContextFlags = CONTEXT_FULL; + + ResumeThread (procinfo.hThread); + + total_cycles = 0; + + if (tracing_enabled) + { + tracefile = fopen ("trace.ssp", "w"); + if (!tracefile) + { + tracing_enabled = 0; + perror ("trace.ssp"); + } + } + + running = 1; + while (running) + { + int src, dest; + DWORD rv; + DEBUG_EVENT event; + int contv = DBG_CONTINUE; + + event.dwDebugEventCode = -1; + if (!WaitForDebugEvent (&event, INFINITE)) + { + printf ("idle...\n"); + } + + hThread = lookup_thread_id (event.dwThreadId, &tix); + +#if 0 + printf ("DE: %x/%d %d %d ", + hThread, tix, + event.dwDebugEventCode, num_active_threads); + for (src=0; src<num_active_threads; src++) + { + int sc = SuspendThread (active_threads[src]); + int rv = GetThreadContext (active_threads[src], &context); + ResumeThread (active_threads[src]); + printf (" [%x,%x,%x]", + active_threads[src], context.Eip, active_thread_ids[src]); + } + printf ("\n"); +#endif + + switch (event.dwDebugEventCode) + { + + case CREATE_PROCESS_DEBUG_EVENT: + break; + + case CREATE_THREAD_DEBUG_EVENT: + if (verbose) + printf ("create thread %08lx at %08x %s\n", + event.dwThreadId, + (int)event.u.CreateThread.lpStartAddress, + addr2dllname ((unsigned int)event.u.CreateThread.lpStartAddress)); + + active_thread_ids[num_active_threads] = event.dwThreadId; + active_threads[num_active_threads] = event.u.CreateThread.hThread; + thread_return_address[num_active_threads] = 0; + num_active_threads++; + + if (trace_all_threads && stepping_enabled) + { + thread_step_flags[num_active_threads-1] = stepping_enabled; + add_breakpoint ((int)event.u.CreateThread.lpStartAddress); + } + + break; + + case EXIT_THREAD_DEBUG_EVENT: + if (verbose) + printf ("exit thread %08lx, code=%ld\n", + event.dwThreadId, + event.u.ExitThread.dwExitCode); + + for (src=0, dest=0; src<num_active_threads; src++) + if (active_thread_ids[src] != event.dwThreadId) + { + active_thread_ids[dest] = active_thread_ids[src]; + active_threads[dest] = active_threads[src]; + dest++; + } + num_active_threads = dest; + break; + + case EXCEPTION_DEBUG_EVENT: + rv = GetThreadContext (hThread, &context); + switch (event.u.Exception.ExceptionRecord.ExceptionCode) + { + case STATUS_BREAKPOINT: + if (remove_breakpoint ((int)event.u.Exception.ExceptionRecord.ExceptionAddress)) + { + context.Eip --; + if (rv != -1) + SetThreadContext (hThread, &context); + if (ReadProcessMemory (hProcess, (void *)context.Esp, &rv, 4, &rv)) + thread_return_address[tix] = rv; + } + set_step_threads (event.dwThreadId, stepping_enabled); + case STATUS_SINGLE_STEP: + opcode_count++; + pc = (unsigned int)event.u.Exception.ExceptionRecord.ExceptionAddress; + sp = (unsigned int)context.Esp; + if (tracing_enabled) + fprintf (tracefile, "%08x %08lx\n", pc, event.dwThreadId); + if (trace_console) + { + printf ("%d %08x\n", tix, pc); + fflush (stdout); + } + + if (dll_counts) + { + int i; + for (i=num_dlls-1; i>=0; i--) + { + if (dll_info[i].base_address < context.Eip) + { + if (hThread == procinfo.hThread) + dll_info[i].pcount++; + else + dll_info[i].scount++; + break; + } + } + } + + if (pc < last_pc || pc > last_pc+10) + { + static int ncalls=0; + static int qq=0; + if (++qq % 100 == 0) + fprintf (stderr, " %08x %d %d \r", + pc, ncalls, opcode_count); + + if (sp == last_sp-4) + { + ncalls++; + store_call_edge (last_pc, pc); + if (last_pc < KERNEL_ADDR && pc > KERNEL_ADDR) + { + int retaddr; + DWORD rv; + ReadProcessMemory (hProcess, + (void *)sp, + (LPVOID)&(retaddr), + 4, &rv); +#if 0 + printf ("call last_pc = %08x pc = %08x rv = %08x\n", + last_pc, pc, retaddr); + /* experimental - try to skip kernel calls for speed */ + add_breakpoint (retaddr); + set_step_threads (event.dwThreadId, 0); +#endif + } + } + } + + total_cycles++; + last_sp = sp; + last_pc = pc; + if (pc >= low_pc && pc < high_pc) + hits[(pc - low_pc)/2] ++; + break; + default: + if (verbose) + { + printf ("exception %ld, ", event.u.Exception.dwFirstChance); + printf ("code: %lx flags: %lx\n", + event.u.Exception.ExceptionRecord.ExceptionCode, + event.u.Exception.ExceptionRecord.ExceptionFlags); + if (event.u.Exception.dwFirstChance == 1) + dump_registers (hThread); + } + contv = DBG_EXCEPTION_NOT_HANDLED; + running = 0; + break; + } + + if (rv != -1) + { + if (pc == thread_return_address[tix]) + { + if (context.EFlags & 0x100) + { + context.EFlags &= ~0x100; /* TRAP (single step) flag */ + SetThreadContext (hThread, &context); + } + } + else if (stepping_enabled) + { + if (!(context.EFlags & 0x100)) + { + context.EFlags |= 0x100; /* TRAP (single step) flag */ + SetThreadContext (hThread, &context); + } + } + } + break; + + case OUTPUT_DEBUG_STRING_EVENT: + string = (char *)malloc (event.u.DebugString.nDebugStringLength+1); + i = ReadProcessMemory (hProcess, + event.u.DebugString.lpDebugStringData, + (LPVOID)string, + event.u.DebugString.nDebugStringLength, + &rv); + if (!i) + { + printf ("error reading memory: %ld %ld\n", rv, GetLastError ()); + } + if (verbose) + printf ("ODS: %x/%d \"%s\"\n", + (int)hThread, tix, string); + + if (strcmp (string, "ssp on") == 0) + { + stepping_enabled = 1; + set_step_threads (event.dwThreadId, 1); + } + + if (strcmp (string, "ssp off") == 0) + { + stepping_enabled = 0; + set_step_threads (event.dwThreadId, 0); + } + + break; + + + case LOAD_DLL_DEBUG_EVENT: + if (verbose) + printf ("load dll %08x:", + (int)event.u.LoadDll.lpBaseOfDll); + + dll_ptr = (char *)"( u n k n o w n ) \0\0"; + if (event.u.LoadDll.lpImageName) + { + ReadProcessMemory (hProcess, + event.u.LoadDll.lpImageName, + (LPVOID)&src, + sizeof (src), + &rv); + if (src) + { + ReadProcessMemory (hProcess, + (void *)src, + (LPVOID)dll_name, + sizeof (dll_name), + &rv); + dll_name[rv] = 0; + dll_ptr = dll_name; + for (cp=dll_name; *cp; cp++) + { + if (*cp == '\\' || *cp == '/') + { + dll_ptr = cp+1; + } + *cp = tolower (*cp); + } + } + } + + + dll_info[num_dlls].base_address + = (unsigned int)event.u.LoadDll.lpBaseOfDll; + dll_info[num_dlls].pcount = 0; + dll_info[num_dlls].scount = 0; + dll_info[num_dlls].name = wide_strdup (dll_ptr); + if (verbose) + printf (" %s\n", dll_info[num_dlls].name); + num_dlls++; + qsort (dll_info, num_dlls, sizeof (DllInfo), dll_sort); + + break; + + case UNLOAD_DLL_DEBUG_EVENT: + if (verbose) + printf ("unload dll\n"); + break; + + case EXIT_PROCESS_DEBUG_EVENT: + if (verbose) + printf ("process %08lx %08lx exit %ld\n", + event.dwProcessId, event.dwThreadId, + event.u.ExitProcess.dwExitCode); + + running = 0; + break; + } + + set_steps (); + ContinueDebugEvent (event.dwProcessId, event.dwThreadId, contv); + } + + count = 0; + for (pc=low_pc; pc<high_pc; pc+=2) + { + count += hits[(pc - low_pc)/2]; + } + printf ("total cycles: %d, counted cycles: %d\n", total_cycles, count); + + if (tracing_enabled) + fclose (tracefile); + +} + +#define WHITE(s) while (isspace (*s)) s++ + +int +main (int argc, char **argv) +{ + int i, n; + int total_pcount, total_scount; + FILE *gmon; + + setbuf (stdout, 0); + + cmd_line = GetCommandLine (); + + /* strip off the program part */ + n = sscanf (cmd_line, "%*s%n", &i); + cmd_line += i; + WHITE (cmd_line); + + /* parse arguments. Ugly, but we need to preserve the tail of the line */ + while (*cmd_line == '-') + { + char opt[100]; + n = sscanf (cmd_line, " %s%n", opt, &i); + cmd_line += i; + WHITE (cmd_line); + + if (strcmp (opt, "-d") == 0) + { + printf ("stepping disabled; enable via OutputDebugString (\"ssp on\")\n"); + stepping_enabled = 0; + } + if (strcmp (opt, "-e") == 0) + { + printf ("stepping enabled; disable via OutputDebugString (\"ssp off\")\n"); + stepping_enabled = 1; + } + if (strcmp (opt, "-t") == 0) + { + printf ("tracing all $eip to trace.ssp\n"); + tracing_enabled = 1; + } + if (strcmp (opt, "-tc") == 0) + { + printf ("tracing *all* $eip to the console\n"); + trace_console = 1; + } + if (strcmp (opt, "-s") == 0) + { + printf ("tracing all sub-threads too, not just the main one\n"); + trace_all_threads = 1; + } + if (strcmp (opt, "-dll") == 0) + { + printf ("profiling dll usage\n"); + dll_counts = 1; + } + if (strcmp (opt, "-v") == 0) + { + printf ("verbose messages enabled\n"); + verbose = 1; + } + } + + n = sscanf (cmd_line, " %i %i %n", &low_pc, &high_pc, &i); + if (n < 2) + { + fputs (help_text, stderr); + exit (1); + } + cmd_line += i; + + if (low_pc > high_pc-8) + { + fprintf (stderr, "Hey, low_pc must be lower than high_pc\n"); + exit (1); + } + + hits = (HISTCOUNTER *)malloc (high_pc-low_pc+4); + memset (hits, 0, high_pc-low_pc+4); + + fprintf (stderr, "prun: [%08x,%08x] Running `%s'\n", + low_pc, high_pc, cmd_line); + + run_program (cmd_line); + + hdr.lpc = low_pc; + hdr.hpc = high_pc; + hdr.ncnt = high_pc-low_pc + sizeof (hdr); + hdr.version = GMONVERSION; + hdr.profrate = 100; + + gmon = fopen ("gmon.out", "wb"); + fwrite (&hdr, 1, sizeof (hdr), gmon); + fwrite (hits, 1, high_pc-low_pc, gmon); + write_call_edges (gmon); + fclose (gmon); + + if (dll_counts) + { + /* 1234567 123% 1234567 123% 12345678 xxxxxxxxxxx */ + printf (" Main-Thread Other-Thread BaseAddr DLL Name\n"); + + total_pcount = 0; + total_scount = 0; + for (i=0; i<num_dlls; i++) + { + total_pcount += dll_info[i].pcount; + total_scount += dll_info[i].scount; + } + + if (total_pcount == 0) total_pcount++; + if (total_scount == 0) total_scount++; + + for (i=0; i<num_dlls; i++) + if (dll_info[i].pcount || dll_info[i].scount) + { + printf ("%7d %3d%% %7d %3d%% %08x %s\n", + dll_info[i].pcount, + (dll_info[i].pcount*100)/opcode_count, + dll_info[i].scount, + (dll_info[i].scount*100)/opcode_count, + dll_info[i].base_address, + dll_info[i].name); + } + } + + exit (0); +} + diff --git a/winsup/utils/ssp.txt b/winsup/utils/ssp.txt new file mode 100644 index 000000000..513767f45 --- /dev/null +++ b/winsup/utils/ssp.txt @@ -0,0 +1,125 @@ + +SSP - The Single Step Profiler + +Original Author: DJ Delorie <dj@redhat.com> + +The SSP is a program that uses the Win32 debug API to run a program +one ASM instruction at a time. It records the location of each +instruction used, how many times that instruction is used, and all +function calls. The results are saved in a format that is usable by +the profiling program "gprof", although gprof will claim the values +are seconds, they really are instruction counts. More on that later. + +Because the SSP was originally designed to profile the cygwin DLL, it +does not automatically select a block of code to report statistics on. +You must specify the range of memory addresses to keep track of +manually, but it's not hard to figure out what to specify. Use the +"objdump" program to determine the bounds of the target's ".text" +section. Let's say we're profiling cygwin1.dll. Make sure you've +built it with debug symbols (else gprof won't run) and run objdump +like this: + + objdump -h cygwin1.dll + +It will print a report like this: + +cygwin1.dll: file format pei-i386 + +Sections: +Idx Name Size VMA LMA File off Algn + 0 .text 0007ea00 61001000 61001000 00000400 2**2 + CONTENTS, ALLOC, LOAD, READONLY, CODE, DATA + 1 .data 00008000 61080000 61080000 0007ee00 2**2 + CONTENTS, ALLOC, LOAD, DATA + . . . + +The only information we're concerned with are the VMA of the .text +section and the VMA of the section after it (sections are usually +contiguous; you can also add the Size to the VMA to get the end +address). In this case, the VMA is 0x61001000 and the ending address +is either 0x61080000 (start of .data method) or 0x0x6107fa00 (VMA+Size +method). + +There are two basic ways to use SSP - either profiling a whole +program, or selectively profiling parts of the program. + +To profile a whole program, just run ssp without options. By default, +it will step the whole program. Here's a simple example, using the +numbers above: + + ssp 0x61001000 0x61080000 hello.exe + +This will step the whole program. It will take at least 8 minutes on +a PII/300 (yes, really). When it's done, it will create a file called +"gmon.out". You can turn this data file into a readable report with +gprof: + + gprof -b cygwin1.dll + +The "-b" means "skip the help pages". You can omit this until you're +familiar with the report layout. The gprof documentation explains +a lot about this report, but ssp changes a few things. For example, +the first part of the report reports the amount of time spent in each +function, like this: + +Each sample counts as 0.01 seconds. + % cumulative self self total + time seconds seconds calls ms/call ms/call name + 10.02 231.22 72.43 46 1574.57 1574.57 strcspn + 7.95 288.70 57.48 130 442.15 442.15 strncasematch + +The "seconds" columns are really CPU opcodes, 1/100 second per opcode. +So, "231.22" above means 23,122 opcodes. The ms/call values are 10x +too big; 1574.57 means 157.457 opcodes per call. Similar adjustments +need to be made for the "self" and "children" columns in the second +part of the report. + +OK, so now we've got a huge report that took a long time to generate, +and we've identified a spot we want to work on optimizing. Let's say +it's the time() function. We can use SSP to selectively profile this +function by using OutputDebugString() to control SSP from within the +program. Here's a sample program: + + #include <windows.h> + main() + { + time_t t; + OutputDebugString("ssp on"); + time(&t); + OutputDebugString("ssp off"); + } + +Then, add the "-d" option to ssp to default to *disabling* profiling. +The program will run at full speed until the first OutputDebugString, +then step until the second. + + ssp -d 0x61001000 0x61080000 hello.exe + +You can then use gprof (as usual) to see the performance profile for +just that portion of the program's execution. + +OK, now for the other ssp options, and when to use them: + +"-v" - verbose. This prints messages about threads starting and +stopping, OutputDebugString calls, DLLs loading, etc. + +"-t" and "-tc" - tracing. With -t, *every* step's address is written +to the file "trace.ssp". This can be used to help debug functions, +since it can trace multiple threads. Clever use of scripts can match +addresses with disassembled opcodes if needed. Warning: creates +*huge* files, very quickly. "-tc" prints each address to the console, +useful for debugging key chunks of assembler. + +"-s" - subthreads. Usually, you only need to trace the main thread, +but sometimes you need to trace all threads, so this enables that. +It's also needed when you want to profile a function that only a +subthread calls. However, using OutputDebugString automatically +enables profiling on the thread that called it, not the main thread. + +"-dll" - dll profiling. Generates a pretty table of how much time was +spent in each dll the program used. No sense optimizing a function in +your program if most of the time is spent in the DLL. + +I usually use the -v, -s, and -dll options: + + ssp -v -s -dll -d 0x61001000 0x61080000 hello.exe