linuxKludges.C

Go to the documentation of this file.
00001 /*
00002  * See the dyninst/COPYRIGHT file for copyright information.
00003  * 
00004  * We provide the Paradyn Tools (below described as "Paradyn")
00005  * on an AS IS basis, and do not warrant its validity or performance.
00006  * We reserve the right to update, modify, or discontinue this
00007  * software at any time.  We shall have no obligation to supply such
00008  * updates or modifications or any other form of support to you.
00009  * 
00010  * By your use of Paradyn, you understand and agree that we (or any
00011  * other person or entity with proprietary rights in Paradyn) are
00012  * under no obligation to provide either maintenance services,
00013  * update services, notices of latent defects, or correction of
00014  * defects for Paradyn.
00015  * 
00016  * This library is free software; you can redistribute it and/or
00017  * modify it under the terms of the GNU Lesser General Public
00018  * License as published by the Free Software Foundation; either
00019  * version 2.1 of the License, or (at your option) any later version.
00020  * 
00021  * This library is distributed in the hope that it will be useful,
00022  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00023  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00024  * Lesser General Public License for more details.
00025  * 
00026  * You should have received a copy of the GNU Lesser General Public
00027  * License along with this library; if not, write to the Free Software
00028  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00029  */
00030 
00031 #include "common/h/headers.h"
00032 #include "common/h/parseauxv.h"
00033 #include "common/h/linuxKludges.h"
00034 #include "common/h/Types.h"
00035 
00036 #include <elf.h>
00037 
00038 #include <vector>
00039 #include <sys/types.h>
00040 #include <unistd.h>
00041 #include <dirent.h>
00042 #include <string.h>
00043 
00044 typedef int (*intKludge)();
00045 
00046 int P_getopt(int argc, char *argv[], const char *optstring)
00047 {
00048   /* On linux we prepend a + character */
00049   char newopt[strlen(optstring)+5];
00050   strcpy(newopt, "+");
00051   strcat(newopt, optstring);
00052   return getopt(argc, argv, newopt);
00053 }
00054 
00055 int P_copy(const char *from, const char *to) {
00056     int from_fd = P_open(from, O_RDONLY, 0);
00057     if (from_fd == -1)  {
00058         perror("Opening from file in copy"); 
00059         return -1;
00060     }
00061     int to_fd = P_open(to, O_WRONLY | O_APPEND | O_CREAT | O_TRUNC, 0);
00062     if (to_fd == -1) {
00063         perror("Opening to file in copy");
00064         close(from_fd);
00065         return -1;
00066     }
00067 
00068     char buffer[1048576];
00069     while(true) {
00070         int amount = read(from_fd, buffer, 1048576);
00071         if (amount == -1) {
00072             perror("Reading in file copy");
00073             return -1;
00074         }
00075         write(to_fd, buffer, amount);
00076         if (amount < 1048576) break;
00077     }
00078     close(to_fd);
00079     close(from_fd);
00080     return 0;
00081 }
00082 
00083 
00084 unsigned long long PDYN_div1000(unsigned long long in) {
00085    /* Divides by 1000 without an integer division instruction or library call, both of
00086     * which are slow.
00087     * We do only shifts, adds, and subtracts.
00088     *
00089     * We divide by 1000 in this way:
00090     * multiply by 1/1000, or multiply by (1/1000)*2^30 and then right-shift by 30.
00091     * So what is 1/1000 * 2^30?
00092     * It is 1,073,742.   (actually this is rounded)
00093     * So we can multiply by 1,073,742 and then right-shift by 30 (neat, eh?)
00094     *
00095     * Now for multiplying by 1,073,742...
00096     * 1,073,742 = (1,048,576 + 16384 + 8192 + 512 + 64 + 8 + 4 + 2)
00097     * or, slightly optimized:
00098     * = (1,048,576 + 16384 + 8192 + 512 + 64 + 16 - 2)
00099     * for a total of 8 shifts and 6 add/subs, or 14 operations.
00100     *
00101     */
00102 
00103    unsigned long long temp = in << 20; // multiply by 1,048,576
00104       // beware of overflow; left shift by 20 is quite a lot.
00105       // If you know that the input fits in 32 bits (4 billion) then
00106       // no problem.  But if it's much bigger then start worrying...
00107 
00108    temp += in << 14; // 16384
00109    temp += in << 13; // 8192
00110    temp += in << 9;  // 512
00111    temp += in << 6;  // 64
00112    temp += in << 4;  // 16
00113    temp -= in >> 2;  // 2
00114 
00115    return (temp >> 30); // divide by 2^30
00116 }
00117 
00118 unsigned long long PDYN_divMillion(unsigned long long in) {
00119    /* Divides by 1,000,000 without an integer division instruction or library call,
00120     * both of which are slow.
00121     * We do only shifts, adds, and subtracts.
00122     *
00123     * We divide by 1,000,000 in this way:
00124     * multiply by 1/1,000,000, or multiply by (1/1,000,000)*2^30 and then right-shift
00125     * by 30.  So what is 1/1,000,000 * 2^30?
00126     * It is 1,074.   (actually this is rounded)
00127     * So we can multiply by 1,074 and then right-shift by 30 (neat, eh?)
00128     *
00129     * Now for multiplying by 1,074
00130     * 1,074 = (1024 + 32 + 16 + 2)
00131     * for a total of 4 shifts and 4 add/subs, or 8 operations.
00132     *
00133     * Note: compare with div1000 -- it's cheaper to divide by a million than
00134     *       by a thousand (!)
00135     *
00136     */
00137 
00138    unsigned long long temp = in << 10; // multiply by 1024
00139       // beware of overflow...if the input arg uses more than 52 bits
00140       // than start worrying about whether (in << 10) plus the smaller additions
00141       // we're gonna do next will fit in 64...
00142 
00143    temp += in << 5; // 32
00144    temp += in << 4; // 16
00145    temp += in << 1; // 2
00146 
00147    return (temp >> 30); // divide by 2^30
00148 }
00149 
00150 unsigned long long PDYN_mulMillion(unsigned long long in) {
00151    unsigned long long result = in;
00152 
00153    /* multiply by 125 by multiplying by 128 and subtracting 3x */
00154    result = (result << 7) - result - result - result;
00155 
00156    /* multiply by 125 again, for a total of 15625x */
00157    result = (result << 7) - result - result - result;
00158 
00159    /* multiply by 64, for a total of 1,000,000x */
00160    result <<= 6;
00161 
00162    /* cost was: 3 shifts and 6 subtracts
00163     * cost of calling mul1000(mul1000()) would be: 6 shifts and 4 subtracts
00164     *
00165     * Another algorithm is to multiply by 2^6 and then 5^6.
00166     * The former is super-cheap (one shift); the latter is more expensive.
00167     * 5^6 = 15625 = 16384 - 512 - 256 + 8 + 1
00168     * so multiplying by 5^6 means 4 shift operations and 4 add/sub ops
00169     * so multiplying by 1000000 means 5 shift operations and 4 add/sub ops.
00170     * That may or may not be cheaper than what we're doing (3 shifts; 6 subtracts);
00171     * I'm not sure.  --ari
00172     */
00173 
00174    return result;
00175 }
00176 
00177 #if defined(cap_gnu_demangler)
00178 #include <cxxabi.h>
00179 using namespace __cxxabiv1;
00180 #endif
00181 
00182 char * P_cplus_demangle( const char * symbol, bool nativeCompiler,
00183                 bool includeTypes ) 
00184 {
00185    int opts = 0;
00186    opts |= includeTypes ? DMGL_PARAMS | DMGL_ANSI : 0;
00187    //   [ pgcc/CC are the "native" compilers on Linux. Go figure. ]
00188    // pgCC's mangling scheme most closely resembles that of the Annotated
00189    // C++ Reference Manual, only with "some exceptions" (to quote the PGI
00190    // documentation). I guess we'll demangle names with "some exceptions".
00191    opts |= nativeCompiler ? DMGL_ARM : 0;
00192 
00193 #if defined(cap_gnu_demangler)
00194    int status;
00195    char *demangled = __cxa_demangle(symbol, NULL, NULL, &status);
00196    if (status == -1) {
00197       //Memory allocation failure.
00198       return NULL;
00199    }
00200    if (status == -2) {
00201       //Not a C++ name
00202       return NULL;
00203    }
00204    assert(status == 0); //Success
00205 #else
00206    char * demangled = cplus_demangle( const_cast< char *>(symbol), opts);
00207 #endif
00208    if( demangled == NULL ) { return NULL; }
00209 
00210    if( ! includeTypes ) {
00211         /* de-demangling never increases the length */   
00212         char * dedemangled = strdup( demangled );   
00213         assert( dedemangled != NULL );
00214         dedemangle( demangled, dedemangled );
00215         assert( dedemangled != NULL );
00216 
00217         free( demangled );
00218         return dedemangled;
00219         }
00220 
00221    return demangled;
00222 } /* end P_cplus_demangle() */
00223 
00224 bool PtraceBulkRead(Address inTraced, unsigned size, const void *inSelf, int pid)
00225 {
00226    const unsigned char *ap = (const unsigned char*) inTraced; 
00227    unsigned char *dp = (unsigned char *) const_cast<void *>(inSelf);
00228    Address w = 0x0;               /* ptrace I/O buffer */
00229    int len = sizeof(void *);
00230    unsigned cnt;
00231    
00232    if (0 == size) {
00233       return true;
00234    }
00235 
00236    cnt = inTraced % len;
00237    if (cnt) {
00238       /* Start of request is not aligned. */
00239       unsigned char *p = (unsigned char*) &w;
00240       
00241       /* Read the segment containing the unaligned portion, and
00242          copy what was requested to DP. */
00243       errno = 0;
00244       w = P_ptrace(PTRACE_PEEKTEXT, pid, (Address) (ap-cnt), w, len);
00245       if (errno) {
00246          return false;
00247       }
00248       for (unsigned i = 0; i < len-cnt && i < size; i++)
00249          dp[i] = p[cnt+i];
00250       
00251       if (len-cnt >= size) {
00252          return true; /* done */
00253       }
00254       
00255       dp += len-cnt;
00256       ap += len-cnt;
00257       size -= len-cnt;
00258    }
00259    /* Copy aligned portion */
00260    while (size >= (u_int)len) {
00261       errno = 0;
00262       w = P_ptrace(PTRACE_PEEKTEXT, pid, (Address) ap, 0, len);
00263       if (errno) {
00264          return false;
00265       }
00266       memcpy(dp, &w, len);
00267       dp += len;
00268       ap += len;
00269       size -= len;
00270    }
00271    
00272    if (size > 0) {
00273       /* Some unaligned data remains */
00274       unsigned char *p = (unsigned char *) &w;
00275       
00276       /* Read the segment containing the unaligned portion, and
00277          copy what was requested to DP. */
00278       errno = 0;
00279       w = P_ptrace(PTRACE_PEEKTEXT, pid, (Address) ap, 0, len);
00280       if (errno) {
00281          return false;
00282       }
00283       for (unsigned i = 0; i < size; i++)
00284          dp[i] = p[i];
00285    }
00286    return true;
00287 
00288 }
00289 
00290 bool PtraceBulkWrite(Dyninst::Address inTraced, unsigned nbytes, 
00291                      const void *inSelf, int pid)
00292 {
00293    unsigned char *ap = (unsigned char*) inTraced;
00294    const unsigned char *dp = (const unsigned char*) inSelf;
00295    Address w = 0x0;               /* ptrace I/O buffer */
00296    int len = sizeof(Address); /* address alignment of ptrace I/O requests */
00297    unsigned cnt;
00298    
00299    if (0 == nbytes) {
00300       return true;
00301    }
00302    
00303    if ((cnt = ((Address)ap) % len)) {
00304       /* Start of request is not aligned. */
00305       unsigned char *p = (unsigned char*) &w;
00306       
00307       /* Read the segment containing the unaligned portion, edit
00308          in the data from DP, and write the segment back. */
00309       errno = 0;
00310       w = P_ptrace(PTRACE_PEEKTEXT, pid, (Address) (ap-cnt), 0);
00311 
00312       if (errno) {
00313          return false;
00314       }
00315 
00316       for (unsigned i = 0; i < len-cnt && i < nbytes; i++)
00317          p[cnt+i] = dp[i];
00318       
00319       if (0 > P_ptrace(PTRACE_POKETEXT, pid, (Address) (ap-cnt), w)) {
00320          return false;
00321       }
00322 
00323       if (len-cnt >= nbytes) {
00324          return true; /* done */
00325       }
00326 
00327       dp += len-cnt;
00328       ap += len-cnt;
00329       nbytes -= len-cnt;
00330    }
00331    
00332    /* Copy aligned portion */
00333    while (nbytes >= (u_int)len) {
00334       assert(0 == ((Address)ap) % len);
00335       memcpy(&w, dp, len);
00336       int retval =  P_ptrace(PTRACE_POKETEXT, pid, (Address) ap, w);
00337       if (retval < 0) {
00338          return false;
00339       }
00340 
00341       // Check...
00342       dp += len;
00343       ap += len;
00344       nbytes -= len;
00345    }
00346 
00347    if (nbytes > 0) {
00348       /* Some unaligned data remains */
00349       unsigned char *p = (unsigned char *) &w;
00350 
00351       /* Read the segment containing the unaligned portion, edit
00352          in the data from DP, and write it back. */
00353       errno = 0;
00354       w = P_ptrace(PTRACE_PEEKTEXT, pid, (Address) ap, 0);
00355 
00356       if (errno) {
00357          return false;
00358       }
00359 
00360 
00361       for (unsigned i = 0; i < nbytes; i++)
00362          p[i] = dp[i];
00363 
00364       if (0 > P_ptrace(PTRACE_POKETEXT, pid, (Address) ap, w)) {
00365          return false;
00366       }
00367    }
00368    return true;
00369 }
00370 
00371 // These constants are not defined in all versions of elf.h
00372 #ifndef AT_BASE
00373 #define AT_BASE 7
00374 #endif
00375 #ifndef AT_NULL
00376 #define AT_NULL 0
00377 #endif
00378 #ifndef AT_SYSINFO
00379 #define AT_SYSINFO 32
00380 #endif
00381 #ifndef AT_SYSINFO_EHDR
00382 #define AT_SYSINFO_EHDR 33
00383 #endif
00384 
00385 static bool couldBeVsyscallPage(map_entries *entry, bool strict, Address) {
00386    if (strict) {
00387        if (entry->prems != PREMS_PRIVATE)
00388          return false;
00389       if (entry->path[0] != '\0')
00390          return false;
00391    }
00392    if (entry->offset != 0)
00393       return false;
00394    if (entry->dev_major != 0 || entry->dev_minor != 0)
00395       return false;
00396    if (entry->inode != 0)
00397       return false;
00398 
00399    return true;
00400 }
00401 
00402 bool AuxvParser::readAuxvInfo()
00403 {
00404   /**
00405    * The location of the vsyscall is stored in /proc/PID/auxv in Linux 2.6.
00406    * auxv consists of a list of name/value pairs, ending with the AT_NULL
00407    * name.  There isn't a direct way to get the vsyscall info on Linux 2.4
00408    **/
00409   uint32_t *buffer32 = NULL;
00410   uint64_t *buffer64 = NULL;
00411   unsigned pos = 0;
00412   Address dso_start = 0x0, text_start = 0x0;
00413 
00414   struct {
00415     unsigned long type;
00416     unsigned long value;
00417   } auxv_entry;
00418 
00419   /**
00420    * Try to read from /proc/%d/auxv.  On Linux 2.4 systems auxv
00421    * doesn't exist, which is okay because vsyscall isn't used.
00422    * On latter 2.6 kernels the AT_SYSINFO field isn't present,
00423    * so we have to resort to more "extreme" measures.
00424    **/
00425   buffer64 = (uint64_t *) readAuxvFromProc();
00426   if (!buffer64) {
00427      buffer64 = (uint64_t *) readAuxvFromStack();
00428   }
00429   if (!buffer64) {
00430      return false;
00431   }
00432   buffer32 = (uint32_t *) buffer64;
00433   do {
00434      /**Fill in the auxv_entry structure.  We may have to do different
00435       * size reads depending on the address space.  No matter which
00436       * size we read, we'll fill the data in to auxv_entry, which may
00437       * involve a size shift up.
00438       **/
00439      if (addr_size == 4) {
00440         auxv_entry.type = (unsigned long) buffer32[pos];
00441         pos++;
00442         auxv_entry.value = (unsigned long) buffer32[pos];
00443         pos++;
00444      }
00445      else {
00446         auxv_entry.type = (unsigned long) buffer64[pos];
00447         pos++;
00448         auxv_entry.value = (unsigned long) buffer64[pos];
00449         pos++;
00450      }
00451  
00452      switch(auxv_entry.type) {
00453         case AT_SYSINFO:
00454            text_start = auxv_entry.value;
00455            break;
00456         case AT_SYSINFO_EHDR:
00457            dso_start = auxv_entry.value;
00458            break;
00459         case AT_PAGESZ:
00460            page_size = auxv_entry.value;
00461            break;
00462         case AT_BASE:
00463            interpreter_base = auxv_entry.value;
00464            break;
00465         case AT_PHDR:
00466            phdr = auxv_entry.value;
00467            break;
00468      }
00469     
00470   } while (auxv_entry.type != AT_NULL);
00471 
00472 
00473   if (buffer64)
00474      free(buffer64);
00475   if (!page_size)
00476      page_size = getpagesize();
00477 #if !defined(arch_x86) && !defined(arch_x86_64)
00478   //No vsyscall page needed or present
00479   return true;
00480 #endif
00481 
00482   /**
00483    * Even if we found dso_start in /proc/pid/auxv, the vsyscall 'page'
00484    * can be larger than a single page.  Thus we look through /proc/pid/maps
00485    * for known, default, or guessed start address(es).
00486    **/
00487   std::vector<Address> guessed_addrs;
00488   
00489   /* The first thing to check is the auxvinfo, if we have any. */
00490   if( dso_start != 0x0 ) 
00491      guessed_addrs.push_back( dso_start );
00492     
00493   /**
00494    * We'll make several educatbed attempts at guessing an address
00495    * for the vsyscall page.  After deciding on a guess, we'll try to
00496    * verify that using /proc/pid/maps.
00497    **/
00498   
00499   // Guess some constants that we've seen before.
00500 #if defined(arch_x86) 
00501   guessed_addrs.push_back(0xffffe000); //Many early 2.6 systems
00502   guessed_addrs.push_back(0xffffd000); //RHEL4
00503 #endif
00504 #if defined(arch_x86_64)
00505   guessed_addrs.push_back(0xffffffffff600000);
00506 #endif
00507 
00508   /**
00509    * Look through every entry in /proc/maps, and compare it to every 
00510    * entry in guessed_addrs.  If a guessed_addr looks like the right
00511    * thing, then we'll go ahead and call it the vsyscall page.
00512    **/
00513   unsigned num_maps;
00514   map_entries *secondary_match = NULL;
00515   map_entries *maps = getVMMaps(pid, num_maps);
00516   for (unsigned i=0; i<guessed_addrs.size(); i++) {
00517      Address addr = guessed_addrs[i];
00518      for (unsigned j=0; j<num_maps; j++) {
00519         map_entries *entry = &(maps[j]);
00520         if (addr < entry->start || addr >= entry->end)
00521            continue;
00522 
00523         if (dso_start == entry->start ||
00524             couldBeVsyscallPage(entry, true, page_size)) {
00525            //We found a possible page using a strict check. 
00526            // This is really likely to be it.
00527            vsyscall_base = entry->start;
00528            vsyscall_end = entry->end;
00529            vsyscall_text = text_start;
00530            found_vsyscall = true;
00531            free(maps);
00532            return true;
00533         }
00534 
00535         if (couldBeVsyscallPage(entry, false, page_size)) {
00536            //We found an entry that loosely looks like the
00537            // vsyscall page.  Let's hang onto this and return 
00538            // it if we find nothing else.
00539            secondary_match = entry;
00540         }
00541      }  
00542   }
00543 
00544   /**
00545    * There were no hits using our guessed_addrs scheme.  Let's
00546    * try to look at every entry in the maps table (not just the 
00547    * guessed addresses), and see if any of those look like a vsyscall page.
00548    **/
00549   for (unsigned i=0; i<num_maps; i++) {
00550      if (couldBeVsyscallPage(&(maps[i]), true, page_size)) {
00551         vsyscall_base = maps[i].start;
00552         vsyscall_end = maps[i].end;
00553         vsyscall_text = text_start;
00554         found_vsyscall = true;
00555         free(maps);
00556         return true;
00557      }
00558   }
00559 
00560   /**
00561    * Return any secondary possiblitiy pages we found in our earlier search.
00562    **/
00563   if (secondary_match) {
00564      vsyscall_base = secondary_match->start;
00565      vsyscall_end = secondary_match->end;
00566      vsyscall_text = text_start;
00567      found_vsyscall = true;
00568      free(maps);
00569      return true;
00570   }
00571 
00572   /**
00573    * Time to give up.  Sigh.
00574    **/
00575   found_vsyscall = false;
00576   free(maps);
00577   return false;
00578 }
00579 
00580 #if 0
00581 /**
00582  * get_word_at is a helper function for readAuxvFromStack.  It reads
00583  * a word out of the mutatee's stack via the debugger interface, and
00584  * it keeps the word cached for future reads.
00585  * The gwa_* global variables are basically parameters to get_word_at
00586  * and should be reset before every call
00587  *
00588  * gwa_buffer is a cache of data we've read before.  It's backwards 
00589  * for convience, higher addresses are cached towards the base of gwa_buffer
00590  * and lower addresses are cached at the top.  This is because we read from
00591  * high addresses to low ones, but we want to start caching at the start of
00592  * gwa_buffer.
00593  **/
00594 static unsigned long *gwa_buffer = NULL;
00595 static unsigned gwa_size = 0; 
00596 static unsigned gwa_pos = 0;
00597 static unsigned long gwa_base_addr = 0;
00598 
00599 static unsigned long get_word_at(process *p, unsigned long addr, bool &err) {
00600    bool result;
00601    unsigned word_size = p->getAddressWidth();
00602    unsigned long word;
00603 
00604    /**
00605     * On AMD64 controlling 32-bit mutatee words are 32 bits long.
00606     * We don't want to deal with this now, so treat as a 64 bit read
00607     * (from aligned_addr) and then pick the correct 32 bits to return
00608     * at the end of this function.
00609     **/
00610    unsigned long aligned_addr = addr;
00611    if (word_size == 4 && sizeof(long) == 8 && addr % 8 == 4)
00612       aligned_addr -= 4;
00613 
00614    /**
00615     * Allocate gwa_buffer on first call
00616     **/
00617    if (gwa_buffer == NULL) {
00618       gwa_buffer = (unsigned long *) malloc(gwa_size);
00619    }
00620 
00621    /**
00622     * If gwa_buffer isn't big enough, grow it.
00623     **/
00624    if (gwa_base_addr - gwa_size >= aligned_addr) {
00625       while (gwa_base_addr - gwa_size >= aligned_addr)
00626          gwa_size = gwa_size * 2;
00627       gwa_buffer = (unsigned long *) realloc(gwa_buffer, gwa_size);
00628    }
00629 
00630    /**
00631     * Keep adding words to the cache (gwa_buffer) until we've cached
00632     * the word the user is interested in.
00633     **/
00634    while (gwa_base_addr - (gwa_pos * sizeof(long)) >= aligned_addr) {
00635       result = p->readDataSpace((void *) aligned_addr, sizeof(long), &word, false);
00636       if (!result) {
00637          err = true;
00638          return 0x0;
00639       }
00640       gwa_buffer[gwa_pos] = word;
00641       gwa_pos++;
00642    }
00643 
00644    /**
00645     * Return the word the user wants out of the cache.  'word' is the
00646     * long value we want to return.  On 64-bit mutator/32-bit mutatees
00647     * we may need to return a specific 32-bits of word.
00648     **/
00649    word = gwa_buffer[(gwa_base_addr - aligned_addr) / sizeof(long)];
00650 
00651    if (word_size == 4 && sizeof(long) == 8 && addr % 8 == 4) {
00652       //64-bit mutator, 32 bit mutatee, looking for unaligned word
00653       uint32_t *words = (uint32_t *) &word;
00654       return (long) words[1];
00655    }
00656    else if (word_size == 4 && sizeof(long) == 8)
00657    {
00658       //64-bit mutator, 32 bit mutatee, looking for aligned word
00659       uint32_t *words = (uint32_t *) &word;
00660       return (long) words[0];
00661    }
00662    else
00663    {
00664       //mutator and mutatee are same size
00665       return word;
00666    }
00667 }
00668 
00669 
00670 /**
00671  * Another helper function for readAuxvInfoFromStack.  We want to know
00672  * the top byte of the stack.  Unfortunately, if we're running this it's
00673  * probably because /proc/PID/ isn't reliable, so we can't use maps.  
00674  * Check the machine's stack pointer, page align it, and start walking
00675  * back looking for an unaccessible page.
00676  **/
00677 static Address getStackTop(AddrSpaceReader *proc, bool &err) {
00678    Address stack_pointer;
00679    Address pagesize = getpagesize();
00680    bool result;
00681    long word;
00682    err = false;
00683 
00684 
00685    stack_pointer = proc->readRegContents(PTRACE_REG_SP);
00686    dyn_lwp *init_lwp = proc->getInitialLwp();
00687    if (!init_lwp) {
00688       err = true;
00689       return 0x0;
00690    }
00691 
00692    Frame frame = init_lwp->getActiveFrame();
00693    stack_pointer = frame.getSP();
00694    if (!stack_pointer) {
00695       err = true;
00696       return 0x0;
00697    }
00698    
00699    //Align sp to pagesize
00700    stack_pointer = (stack_pointer & ~(pagesize - 1)) + pagesize;
00701    
00702    //Read pages until we get to an unmapped page
00703    for (;;) {
00704       result = proc->readDataSpace((void *) stack_pointer, sizeof(long), &word, 
00705                                    false);
00706       if (!result) {
00707          break;
00708       }
00709       stack_pointer += pagesize;
00710    }
00711 
00712    //The vsyscall page sometimes hangs out above the stack.  Test if this
00713    // page is it, then move back down one if it is.
00714    char pagestart[4];
00715    result = proc->readDataSpace((void *) (stack_pointer - pagesize), 4, pagestart, 
00716                                 false);
00717    if (result) {
00718       if (pagestart[0] == 0x7F && pagestart[1] == 'E' && 
00719           pagestart[2] == 'L' &&  pagestart[3] == 'F') 
00720       {
00721          stack_pointer -= pagesize;
00722       }
00723    }
00724 
00725    return stack_pointer;
00726 }
00727 
00728 /**
00729  * We can't read /proc/PID/auxv for some reason (BProc is a likely candidate).
00730  * We'll instead pull this data from the mutatee's stack.  On Linux the top of
00731  * the stack at process startup is arranged like the following:
00732  *          -------------------------------------
00733  * esp ->   |                argc               |
00734  *          |               argv[0]             |
00735  *          |                ...                |
00736  *          |               argv[n]             |
00737  *          |                                   |
00738  *          |               envp[0]             |
00739  *          |                ...                |
00740  *          |               envp[n]             |
00741  *          |                NULL               |
00742  *          |                                   |
00743  *          |  { auxv[0].type, auxv[0].value }  |   
00744  *          |                ...                |
00745  *          |  { auxv[n].type, auxv[n].value }  | 
00746  *          |  {      NULL   ,     NULL      }  |
00747  *          |                                   |
00748  *          |      Some number of NULL words    |
00749  *          |        Strings for argv[]         |
00750  *          |        Strings for envp[]         |
00751  *          |                NULL               |
00752  *          -------------------------------------
00753  *
00754  * We want to get at the name/value pairs of auxv.  Unfortunately,
00755  * if we're attaching the stack pointer has probably moved.  Instead
00756  * we'll try to read the from the bottom up, which is more difficult.
00757  * argv[] and envp[] are pointers to the strings at the bottom of
00758  * the stack.  We'll search backwards for these pointers, then move back
00759  * down until we think we have the auxv array.  Yea us.
00760  **/
00761 void *AuxvParser::readAuxvFromStack(process *proc) {
00762    gwa_buffer = NULL;
00763    gwa_size = 1024 * 1024; //One megabyte default
00764    gwa_pos = 0;
00765    unsigned word_size = proc->getAddressWidth();
00766    bool err = false;
00767 
00768    // Get the base address of the mutatee's stack.  For example,
00769    //  on many standard linux/x86 machines this will return 
00770    //  0xc0000000
00771    gwa_base_addr = getStackTop(proc, err);
00772    if (err) 
00773       return NULL;
00774    gwa_base_addr -= word_size;
00775    
00776    unsigned long current = gwa_base_addr;
00777    unsigned long strings_start, strings_end;
00778    unsigned long l1, l2, auxv_start, word;
00779    unsigned char *buffer = NULL;
00780    unsigned bytes_to_read;
00781 
00782    // Go through initial NULL word
00783    while (get_word_at(proc, current, err) == 0x0) {
00784       if (err) goto done_err;
00785       current -= word_size;
00786    }
00787 
00788    // Go through the auxv[] and envp[] strings
00789    strings_end = current;
00790    while (get_word_at(proc, current, err) != 0x0) {
00791       if (err) goto done_err;
00792       current -= word_size;
00793    }
00794    strings_start = current + word_size;
00795    
00796    //Read until we find a pair of pointers into the strings 
00797    // section, this should mean we're now above the auxv vector
00798    // and in envp or argv
00799    for (;;) {
00800       l1 = get_word_at(proc, current, err);
00801       if (err) goto done_err;
00802       l2 = get_word_at(proc, current - word_size, err);
00803       if (err) goto done_err;
00804       if (l1 >= strings_start && l1 < strings_end && 
00805           l2 >= strings_start && l2 < strings_end)
00806          break;
00807       current -= word_size;
00808    }
00809 
00810    //Read back down until we get to the end of envp[]
00811    while (get_word_at(proc, current, err) != 0x0) {
00812       if (err) goto done_err;
00813       current += word_size;
00814    }
00815    //Through the NULL byte before auxv..
00816    while (get_word_at(proc, current, err) == 0x0) {
00817       if (err) goto done_err;
00818       current += word_size;
00819    }
00820 
00821    //Success. Found the start of auxv.
00822    auxv_start = current;
00823 
00824    //Read auxv into buffer
00825    bytes_to_read = strings_start - auxv_start;
00826    buffer = (unsigned char *) malloc(bytes_to_read + word_size*2);
00827    if (!buffer)
00828       goto done_err;   
00829    for (unsigned pos = 0; pos < bytes_to_read; pos += word_size) {
00830       word = get_word_at(proc, auxv_start + pos, err);
00831       if (err) goto done_err;
00832       if (word_size == 4)
00833          *((uint32_t *) (buffer + pos)) = (uint32_t) word;
00834       else
00835          *((unsigned long *) (buffer + pos)) = word;
00836    }
00837 
00838    goto done;
00839 
00840  done_err:
00841    if (buffer)
00842       free(buffer);
00843    buffer = NULL;
00844  done:
00845    if (gwa_buffer)
00846       free(gwa_buffer);
00847    return (void *) buffer;
00848 }
00849 
00850 #else
00851 
00852 void *AuxvParser::readAuxvFromStack() {
00853    /**
00854     * Disabled, for now.  Re-enable if /proc/pid/auxv doesn't exist.
00855     **/
00856    return NULL;
00857 }
00858 
00859 #endif
00860 
00861 #define READ_BLOCK_SIZE (1024 * 5)
00862 void *AuxvParser::readAuxvFromProc() {
00863    char filename[64];
00864    unsigned char *buffer = NULL;
00865    unsigned char *temp;
00866    unsigned buffer_size = READ_BLOCK_SIZE;
00867    unsigned pos = 0;
00868    ssize_t result = 0;
00869    int fd = -1;
00870 
00871    sprintf(filename, "/proc/%d/auxv", pid);
00872    fd = open(filename, O_RDONLY, 0);
00873    if (fd == -1)
00874       goto done_err;
00875 
00876    buffer = (unsigned char *) malloc(buffer_size);
00877    if (!buffer) {
00878       goto done_err;
00879    }
00880 
00881    for (;;) {
00882       result = read(fd, buffer + pos, READ_BLOCK_SIZE);
00883       if (result == -1) {
00884          perror("Couldn't read auxv entry");
00885          goto done_err;
00886       }
00887       else if (!result && !pos) {
00888          //Didn't find any data to read
00889          perror("Could read auxv entry");
00890          goto done_err;
00891       }
00892       else if (result < READ_BLOCK_SIZE) {
00893          //Success
00894          goto done;
00895       }
00896       else if (result == READ_BLOCK_SIZE) {
00897          //WTF... 5k wasn't enough for auxv?
00898          buffer_size *= 2;
00899          temp = (unsigned char *) realloc(buffer, buffer_size);
00900          if (!temp)
00901             goto done_err;
00902          buffer = temp;
00903          pos += READ_BLOCK_SIZE;
00904       }
00905       else {
00906          fprintf(stderr, "[%s:%u] - Unknown error reading auxv\n",
00907                  __FILE__, __LINE__);
00908          goto done_err;
00909       }
00910    }
00911       
00912    done_err:
00913       if (buffer)
00914          free(buffer);
00915       buffer = NULL;
00916    done:
00917       if (fd != -1)
00918          close(fd);
00919       return buffer;
00920 }
00921 
00922 
00923 #define LINE_LEN 1024
00924 map_entries *getVMMaps(int pid, unsigned &maps_size) {
00925    char line[LINE_LEN], prems[16], *s;
00926    int result;
00927    int fd = -1;
00928    map_entries *maps = NULL;
00929    unsigned i, no_lines = 0, cur_pos = 0, cur_size = 4096;
00930    unsigned file_size = 0;
00931    char *buffer = NULL;
00932   
00933    sprintf(line, "/proc/%d/maps", pid);
00934    fd = open(line, O_RDONLY);
00935    if (fd == -1)
00936       goto done_err;
00937    
00938    cur_pos = 0;
00939    buffer = (char *) malloc(cur_size);
00940    if (!buffer) {
00941       goto done_err;
00942    }
00943    for (;;) {
00944       result = read(fd, buffer+cur_pos, cur_size - cur_pos);
00945       if (result == -1) {
00946          goto done_err;
00947       }
00948       cur_pos += result;
00949       if (result == 0) {
00950          break;
00951       }
00952       assert(cur_pos <= cur_size);
00953       if (cur_size == cur_pos) {
00954          cur_size *= 2;
00955          buffer = (char *) realloc(buffer, cur_size);
00956          if (!buffer) {
00957             goto done_err;
00958          }
00959       }
00960    }
00961    file_size = cur_pos;
00962 
00963    close(fd);
00964    fd = -1;
00965    //Calc num of entries needed and allocate the buffer.  Assume the 
00966    //process is stopped.
00967    no_lines = file_size ? 1 : 0;
00968    for (i = 0; i < file_size; i++) {
00969       if (buffer[i] == '\n')
00970          no_lines++;
00971    } 
00972 
00973    maps = (map_entries *) malloc(sizeof(map_entries) * (no_lines+1));
00974    memset(maps, 0, sizeof(map_entries) * (no_lines+1));
00975    if (!maps)
00976       goto done_err;
00977 
00978    //Read all of the maps entries
00979    cur_pos = 0;
00980    for (i = 0; i < no_lines; i++) {
00981       if (cur_pos >= file_size)
00982          break;
00983       unsigned next_end = cur_pos;
00984       while (buffer[next_end] != '\n' && next_end < file_size) next_end++;
00985       unsigned int line_size = (next_end - cur_pos) > LINE_LEN ? LINE_LEN : (next_end - cur_pos);
00986       memcpy(line, buffer+cur_pos, line_size);
00987       line[line_size] = '\0';
00988       line[LINE_LEN - 1] = '\0';
00989       cur_pos = next_end+1;
00990 
00991       sscanf(line, "%lx-%lx %16s %lx %x:%x %u %" MAPENTRIES_PATH_SIZE_STR "s\n", 
00992              (Address *) &maps[i].start, (Address *) &maps[i].end, prems, 
00993              (Address *) &maps[i].offset, &maps[i].dev_major,
00994              &maps[i].dev_minor, &maps[i].inode, maps[i].path);
00995       maps[i].prems = 0;
00996       for (s = prems; *s != '\0'; s++) {
00997          switch (*s) {
00998             case 'r':
00999                maps[i].prems |= PREMS_READ;
01000                break;
01001             case 'w':
01002                maps[i].prems |= PREMS_WRITE;
01003                break;
01004             case 'x':
01005                maps[i].prems |= PREMS_EXEC;
01006                break;
01007             case 'p':
01008                maps[i].prems |= PREMS_PRIVATE;
01009                break;
01010             case 's':
01011                maps[i].prems |= PREMS_EXEC;
01012                break;
01013          }
01014       }
01015    }
01016    //Zero out the last entry
01017    memset(&(maps[i]), 0, sizeof(map_entries));
01018    maps_size = i;
01019 
01020    free(buffer);
01021    return maps;
01022 
01023  done_err:
01024    if (fd != -1)
01025       close(fd);
01026    if (buffer)
01027       free(buffer);
01028    return NULL;
01029 }
01030 
01031 bool findProcLWPs(pid_t pid, std::vector<pid_t> &lwps)
01032 {
01033    char name[32];
01034    struct dirent *direntry;
01035 
01036    /**
01037     * Linux 2.6:
01038     **/
01039    snprintf(name, 32, "/proc/%d/task", pid);
01040    DIR *dirhandle = opendir(name);
01041    if (dirhandle)
01042    {
01043       //Only works on Linux 2.6
01044       while((direntry = readdir(dirhandle)) != NULL) {
01045          unsigned lwp_id = atoi(direntry->d_name);
01046          if (lwp_id) 
01047             lwps.push_back(lwp_id);
01048       }
01049       closedir(dirhandle);
01050       return true;
01051    }
01052    /**
01053     * Linux 2.4:
01054     *
01055     * PIDs that are created by pthreads have a '.' prepending their name
01056     * in /proc.  We'll check all of those for the ones that have this lwp
01057     * as a parent pid.
01058     **/
01059    dirhandle = opendir("/proc");
01060    if (!dirhandle)
01061    {
01062       //No /proc directory.  I give up.  No threads for you.
01063       return false;
01064    } 
01065    while ((direntry = readdir(dirhandle)) != NULL)
01066    {
01067       if (direntry->d_name[0] != '.') {
01068          //fprintf(stderr, "%s[%d]: Skipping entry %s\n", FILE__, __LINE__, direntry->d_name);
01069          continue;
01070       }
01071       unsigned lwp_id = atoi(direntry->d_name+1);
01072       int lwp_ppid;
01073       if (!lwp_id) 
01074          continue;
01075       sprintf(name, "/proc/%d/status", lwp_id);
01076       FILE *fd = P_fopen(name, "r");
01077       if (!fd) {
01078          continue;
01079      }
01080      char buffer[1024];
01081      while (fgets(buffer, 1024, fd)) {
01082          if (strncmp(buffer, "Tgid", 4) == 0) {
01083              sscanf(buffer, "%*s %d", &lwp_ppid);
01084              break;
01085          }
01086      }
01087 
01088      fclose(fd);
01089 
01090      if (lwp_ppid != pid) {
01091          continue;
01092      }
01093      lwps.push_back(lwp_id);
01094   }
01095   closedir(dirhandle);
01096   lwps.push_back(pid);
01097   
01098   return true;
01099 }
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

Generated on 12 Jul 2013 for SymtabAPI by  doxygen 1.6.1