SystemTap.C

Go to the documentation of this file.
00001 #include "elf/src/SystemTap.h"
00002 #include "elf/h/Elf_X.h"
00003 #include <boost/spirit/include/qi.hpp>
00004 #include <boost/spirit/include/qi_parse.hpp>
00005 #include <boost/spirit/include/phoenix_operator.hpp>
00006 #include <boost/spirit/include/phoenix_object.hpp>
00007 #include <boost/spirit/include/phoenix_bind.hpp>
00008 #include <boost/spirit/include/phoenix_statement.hpp>
00009 #include <boost/fusion/adapted/std_pair.hpp>
00010 #include <boost/tokenizer.hpp>
00011 
00012 #include <iostream>
00013 #include <string>
00014 #include <set>
00015 #include <vector>
00016 #include <map>
00017 
00018 #include <elf.h>
00019 
00020 using namespace std;
00021 
00022 namespace qi = boost::spirit::qi;
00023 namespace phoenix = boost::phoenix;
00024 namespace ascii = boost::spirit::ascii;
00025 
00026 namespace Dyninst {
00027 
00028 struct OperandParser {
00029    typedef qi::rule<string::const_iterator, ArgTree::ptr(), ascii::space_type> ArgTreeRule;
00030 
00031    std::map<std::string, Dyninst::MachRegister> register_names;
00032    ArgTree::ptr getReg(std::string name);
00033 
00034    ArgTree::ptr newConstant(const signed long &i);
00035    ArgTree::ptr newDeref(ArgTree::ptr p);
00036    ArgTree::ptr newSegment(ArgTree::ptr a, ArgTree::ptr b);
00037    ArgTree::ptr identity(ArgTree::ptr p);
00038    ArgTree::ptr newDerefAdd(ArgTree::ptr a, ArgTree::ptr b);
00039    ArgTree::ptr newBaseIndexScale(ArgTree::ptr base, ArgTree::ptr index, int scale);
00040    ArgTree::ptr newAdd(ArgTree::ptr a, ArgTree::ptr b);
00041    ArgTree::ptr newRegister(const std::vector<char> &v);   
00042 };
00043 
00044 struct x86OperandParser : public qi::grammar<std::string::const_iterator, ArgTree::ptr(), ascii::space_type>,
00045                           public OperandParser
00046 {
00047    ArgTreeRule operand, shex, reg, mem_modrm_nobase, mem_modrm, modrm;
00048    
00049    void createRegisterNames(Dyninst::Architecture arch);
00050    x86OperandParser(Dyninst::Architecture arch);
00051 };
00052 
00053 struct ppcOperandParser : public qi::grammar<std::string::const_iterator, ArgTree::ptr(), ascii::space_type>,
00054                           public OperandParser
00055 {
00056    ArgTreeRule operand, shex, num, reg;
00057    
00058    void createRegisterNames(Dyninst::Architecture arch);
00059    ppcOperandParser(Dyninst::Architecture arch);
00060 };
00061 
00062 }
00063 
00064 using namespace Dyninst;
00065 
00066 map<Elf_X *, SystemTapEntries *> SystemTapEntries::all_entries;
00067 SystemTapEntries *SystemTapEntries::createSystemTapEntries(Elf_X *file_)
00068 {
00069    map<Elf_X *, SystemTapEntries *>::iterator i = all_entries.find(file_);
00070    if (i != all_entries.end())
00071       return i->second;
00072 
00073    SystemTapEntries *st = new SystemTapEntries(file_);
00074    bool result = st->parse();
00075    if (!result) {
00076       delete st;
00077       st = NULL;
00078    }
00079    
00080    all_entries.insert(make_pair(file_, st));
00081    return st;
00082 }
00083 
00084 SystemTapEntries::SystemTapEntries(Elf_X *file_) :
00085    file(file_)
00086 {
00087 }
00088 
00089 SystemTapEntries::~SystemTapEntries()
00090 {
00091 }
00092 
00093 bool SystemTapEntries::parse() {
00094    switch (file->e_machine()) {
00095       case EM_386:
00096          arch = Arch_x86;
00097          break;
00098       case EM_X86_64:
00099          arch = Arch_x86_64;
00100          break;
00101       case EM_PPC:
00102          arch = Arch_ppc32;
00103          break;
00104       case EM_PPC64:
00105          arch = Arch_ppc64;
00106          break;
00107    }   
00108    word_size = getArchAddressWidth(arch);
00109 
00110    return parseAllNotes();
00111 }
00112 
00113 bool SystemTapEntries::parseAllNotes()
00114 {
00115    for (unsigned short i = 0; i < file->e_shnum(); i++) {
00116       Elf_X_Shdr &shdr = file->get_shdr(i);
00117       if (!shdr.isValid())
00118          continue;
00119       if (shdr.sh_type() != SHT_NOTE)
00120          continue;
00121       
00122       bool result = parseNotes((const void *) shdr.get_data().d_buf(), shdr.sh_size());
00123       if (!result) 
00124          return false;
00125    }
00126 
00127    return true;
00128 }
00129 
00130 #if !defined(_SDT_NOTE_TYPE)
00131 #define SDT_NOTE_TYPE 3
00132 #endif
00133 #if !defined(_SDT_NOTE_NAME) 
00134 #define SDT_NOTE_NAME "stapsdt"
00135 #endif
00136 
00137 bool SystemTapEntries::parseNotes(const void *notes, size_t size)
00138 {
00139    unsigned i=0;
00140    const unsigned char *buffer = (const unsigned char *) notes;
00141    bool parseError = false;
00142 
00143    while (i < size) {
00144       Entry e;
00145 
00146       //Elf32_Nhdr and Elf64_Nhdr are the same structs with different names
00147       // Just using Elf32_Nhdr
00148       Elf32_Nhdr *header = (Elf32_Nhdr *) (buffer+i);
00149       i += sizeof(Elf32_Nhdr);
00150 
00151       //Skip note name, then align
00152       const char *note_name = (const char *) (buffer + i);
00153       i += header->n_namesz;
00154       while (i % 4 != 0) i++;
00155 
00156       if (header->n_type != SDT_NOTE_TYPE) {
00157          i += header->n_descsz;
00158          continue;
00159       }
00160       if (strcmp(SDT_NOTE_NAME, note_name) != 0) {
00161          i += header->n_descsz;
00162          continue;
00163       }
00164 
00165 
00166       //System tap structure format looks like:
00167       // struct {
00168       //   Address addr;
00169       //   Address base_addr;
00170       //   Address semaphore_addr;
00171       //   char provider[]
00172       //   char probe_name[]
00173       //   char argument_string[]
00174       // }
00175       bool result = readAddr(buffer, size, i, e.addr);
00176       if (!result) {
00177          parseError = true;
00178          break;
00179       }
00180       result = readAddr(buffer, size, i, e.base_addr);
00181       if (!result) {
00182          parseError = true;
00183          break;
00184       }
00185       result = readAddr(buffer, size, i, e.semaphore_addr);
00186       if (!result) {
00187          parseError = true;
00188          break;
00189       }
00190       result = readString(buffer, size, i, e.provider);
00191       if (!result) {
00192          parseError = true;
00193          break;
00194       }
00195       std::string name;
00196       result = readString(buffer, size, i, name);
00197       if (!result) {
00198          parseError = true;
00199          break;
00200       }
00201       std::string args;
00202       result = readString(buffer, size, i, args);
00203       if (!result) {
00204          parseError = true;
00205          break;
00206       }
00207       while (i % 4 != 0) i++;
00208 
00209       result = parseOperands(args, e);
00210       if (!result) {
00211          parseError = true;
00212          break;
00213       }
00214 
00215       Entry *entry = new Entry(e);
00216       name_to_entry.insert(make_pair(name, entry));
00217    }
00218 
00219    return !parseError;
00220 }
00221 
00222 bool SystemTapEntries::readAddr(const unsigned char *buffer, size_t bsize, unsigned &offset, 
00223                                 Dyninst::Address &result, unsigned int read_size)
00224 {
00225    if (!read_size) {
00226       read_size = word_size;
00227    }
00228    if (offset + read_size > bsize)
00229       return false;
00230 
00231    if (read_size == 4)
00232       result = *((uint32_t *) (buffer + offset));
00233    else if (read_size == 8)
00234       result = *((uint64_t *) (buffer + offset));
00235    offset += read_size;
00236 
00237    return true;
00238 }
00239 
00240 bool SystemTapEntries::readString(const unsigned char *buffer, size_t bsize, unsigned &offset, 
00241                                   std::string &result)
00242 {
00243    unsigned int start = offset;
00244    unsigned int end = start;
00245 
00246    if (start >= bsize)
00247       return false;
00248 
00249    while (buffer[end] != '\0' && end < bsize) end++;
00250    result = std::string(((const char *) buffer)+start, end-start);
00251    offset = end+1;
00252    return true;
00253 }
00254 
00255 bool SystemTapEntries::parseOperands(std::string ops, Entry &entry)
00256 {
00257    if (ops.empty() || ops == string(":")) {
00258       //Empty operand list
00259       return true;
00260    }
00261 
00262    typedef boost::tokenizer<boost::char_separator<char> > tok_t;
00263    boost::char_separator<char> sep(" ");
00264    tok_t tokens(ops, sep);
00265    
00266    for (tok_t::iterator i = tokens.begin(); i != tokens.end(); i++) {
00267       Arg result;
00268       string arg = *i;
00269 
00270       string operand = arg;
00271       result.arg_size = 0;
00272       result.is_arg_signed = false;
00273 
00274       //Extract the type info encoded in the <int> of the form <int>@<arg>
00275       size_t at_pos = arg.find('@');
00276       if (at_pos != string::npos) {
00277          string type_info_str(operand, 0, at_pos);
00278          if (!type_info_str.empty()) {
00279             char *endptr = NULL;
00280             signed long type_info = strtol(type_info_str.c_str(), &endptr, 10);
00281             if (*endptr == '\0') {
00282                //We have the type info in integer form;
00283                result.arg_size = abs(type_info);
00284                result.is_arg_signed = (type_info < 0);
00285                operand = string(arg, at_pos+1);
00286             }
00287          }
00288       }
00289       bool bres = true;
00290       if (arch == Arch_x86 || arch == Arch_x86_64)
00291          bres = parseOperand_x86(operand, result);
00292       else if (arch == Arch_ppc32 || arch == Arch_ppc64)
00293          bres = parseOperand_ppc(operand, result);
00294 
00295       if (!bres) {
00296          return false;
00297       }
00298       entry.args.push_back(result);
00299    }
00300 
00301    return true;
00302 }
00303 
00304 x86OperandParser *SystemTapEntries::x86_parser = NULL;
00305 x86OperandParser *SystemTapEntries::x86_64_parser = NULL;
00306 ppcOperandParser *SystemTapEntries::ppc32_parser = NULL;
00307 ppcOperandParser *SystemTapEntries::ppc64_parser = NULL;
00308 
00309 bool SystemTapEntries::parseOperand_x86(std::string op, Arg &arg)
00310 {
00311    x86OperandParser* &parser = (arch == Arch_x86) ? x86_parser : x86_64_parser;
00312    if (!parser)
00313       parser = new x86OperandParser(arch);
00314 
00315    using boost::spirit::ascii::space;
00316    std::string::const_iterator iter = op.begin();
00317    std::string::const_iterator end = op.end();
00318    bool result = phrase_parse(iter, end, *parser, space, arg.tree);
00319    if (!result || iter != end) {
00320       //Failed parse.
00321       return false;
00322    }
00323    return true;
00324 }
00325 
00326 bool SystemTapEntries::parseOperand_ppc(std::string op, Arg &arg)
00327 {
00328    ppcOperandParser* &parser = (arch == Arch_ppc32) ? ppc32_parser : ppc64_parser;
00329    if (!parser)
00330       parser = new ppcOperandParser(arch);
00331 
00332    using boost::spirit::ascii::space;
00333    std::string::const_iterator iter = op.begin();
00334    std::string::const_iterator end = op.end();
00335    bool result = phrase_parse(iter, end, *parser, space, arg.tree);
00336    if (!result || iter != end) {
00337       //Failed parse.
00338       return false;
00339    }
00340    return true;
00341 }
00342 
00343 ArgTree::ptr OperandParser::getReg(std::string name) {
00344    std::map<std::string, Dyninst::MachRegister>::iterator i = register_names.find(name);
00345    if (i == register_names.end())
00346       return ArgTree::createRegister(Dyninst::InvalidReg);
00347    return ArgTree::createRegister(i->second);
00348 }
00349    
00350 //Wrappers to fix odd compiler errors from phoenix::bind
00351 ArgTree::ptr OperandParser::newConstant(const signed long &i) {
00352    return ArgTree::createConstant(i);
00353 }
00354 
00355 ArgTree::ptr OperandParser::newDeref(ArgTree::ptr p) { 
00356    return ArgTree::createDeref(p);
00357 }
00358 
00359 ArgTree::ptr OperandParser::newSegment(ArgTree::ptr a, ArgTree::ptr b) {
00360    return ArgTree::createSegment(a, b);
00361 }
00362 
00363 ArgTree::ptr OperandParser::identity(ArgTree::ptr p) { 
00364    return p;
00365 }
00366 
00367 ArgTree::ptr OperandParser::newDerefAdd(ArgTree::ptr a, ArgTree::ptr b) {
00368    return ArgTree::createDeref(ArgTree::createAdd(a, b));
00369 }
00370 
00371 ArgTree::ptr OperandParser::newBaseIndexScale(ArgTree::ptr base, ArgTree::ptr index, int scale) {
00372    return ArgTree::createAdd(base, ArgTree::createMultiply(index, ArgTree::createConstant(scale)));
00373 }
00374 
00375 ArgTree::ptr OperandParser::newAdd(ArgTree::ptr a, ArgTree::ptr b) {
00376    return ArgTree::createAdd(a, b);
00377 }
00378 
00379 ArgTree::ptr OperandParser::newRegister(const std::vector<char> &v) { 
00380    std::string s;
00381    for (std::vector<char>::const_iterator i = v.begin(); i != v.end(); i++) s += *i;
00382    return getReg(s); 
00383 }
00384    
00385 x86OperandParser::x86OperandParser(Dyninst::Architecture arch) : 
00386    x86OperandParser::base_type(operand)
00387 {
00388    using namespace qi::labels;
00389    using qi::uint_;
00390    using qi::int_;
00391    using qi::lit;
00392    using qi::hex;
00393    using qi::alnum;
00394 
00395    using phoenix::construct;
00396    using phoenix::val;
00397    using boost::phoenix::ref;
00398 
00399    createRegisterNames(arch);
00400 
00401    shex = 
00402       lit("0x") >> hex        [qi::_val = phoenix::bind(&OperandParser::newConstant, this, qi::_1)]
00403       | lit("-0x") >> hex     [qi::_val = phoenix::bind(&OperandParser::newConstant, this, -1*qi::_1)]
00404       | int_                  [qi::_val = phoenix::bind(&OperandParser::newConstant, this, qi::_1)]
00405       ;
00406          
00407    reg = '%' >> (+alnum)      [qi::_val = phoenix::bind(&OperandParser::newRegister, this, qi::_1)];
00408          
00409    mem_modrm_nobase = 
00410       ('(' >> reg >> ')')     [qi::_val = phoenix::bind(&OperandParser::identity, this, qi::_1)]
00411       | ( '(' >> reg >> ',' >> reg >> ',' >> uint_ >> ')' )
00412       [qi::_val = phoenix::bind(&OperandParser::newBaseIndexScale, this, qi::_1, qi::_2, qi::_3)]
00413       ;
00414 
00415    mem_modrm = 
00416       (shex >> mem_modrm_nobase)  [qi::_val = phoenix::bind(&OperandParser::newDerefAdd, this, qi::_1, qi::_2)]
00417       | mem_modrm_nobase          [qi::_val = phoenix::bind(&OperandParser::newDeref, this, qi::_1)]
00418       | shex                      [qi::_val = phoenix::bind(&OperandParser::newDeref, this, qi::_1)]
00419 
00420       ;
00421          
00422    modrm =
00423       reg             [qi::_val = phoenix::bind(&OperandParser::identity, this, qi::_1)]
00424       | mem_modrm     [qi::_val = phoenix::bind(&OperandParser::identity, this, qi::_1)]
00425       | '$' >> shex   [qi::_val = phoenix::bind(&OperandParser::identity, this, qi::_1)]
00426       ;
00427          
00428    operand = 
00429       modrm                    [qi::_val = phoenix::bind(&OperandParser::identity, this, qi::_1)]
00430       | (reg >> ':' >> modrm)  [qi::_val = phoenix::bind(&OperandParser::newSegment, this, qi::_1, qi::_2)]
00431       ;
00432 }
00433 
00434 void x86OperandParser::createRegisterNames(Dyninst::Architecture arch) {
00435    Dyninst::MachRegister::NameMap::iterator i = Dyninst::MachRegister::names()->begin();
00436    for (; i != Dyninst::MachRegister::names()->end(); i++) {
00437       Dyninst::MachRegister reg(i->first);
00438             
00439       if (reg.getArchitecture() != arch) {
00440          continue;
00441       }
00442       unsigned int gpr_code = (arch == Dyninst::Arch_x86 ? Dyninst::x86::GPR : Dyninst::x86_64::GPR);
00443       unsigned int seg_code = (arch == Dyninst::Arch_x86 ? Dyninst::x86::SEG : Dyninst::x86_64::SEG);
00444       if (reg.regClass() != gpr_code &&
00445           reg.regClass() != seg_code &&
00446           !reg.isPC()) {
00447          continue;
00448       }
00449       std::string full_reg_name = reg.name();
00450       size_t pos = full_reg_name.find("::");
00451       std::string reg_name = std::string(full_reg_name, pos+2);
00452       register_names[reg_name] = reg;
00453    }
00454 }
00455 
00456 
00457 ppcOperandParser::ppcOperandParser(Dyninst::Architecture arch) :
00458    ppcOperandParser::base_type(operand)
00459 {
00460    using namespace qi::labels;
00461    using qi::uint_;
00462    using qi::int_;
00463    using qi::lit;
00464    using qi::hex;
00465    using qi::alnum;
00466 
00467    using phoenix::construct;
00468    using phoenix::val;
00469    using boost::phoenix::ref;
00470 
00471    createRegisterNames(arch);
00472 
00473    shex = 
00474       lit("0x") >> hex        [qi::_val = phoenix::bind(&OperandParser::newConstant, this, qi::_1)]
00475       | lit("-0x") >> hex     [qi::_val = phoenix::bind(&OperandParser::newConstant, this, -1*qi::_1)]
00476       ;
00477 
00478    num = int_                 [qi::_val = phoenix::bind(&OperandParser::newConstant, this, qi::_1)]
00479       ;
00480          
00481    reg = 'r' >> (+alnum)      [qi::_val = phoenix::bind(&OperandParser::newRegister, this, qi::_1)];
00482       ;
00483 
00484    operand = 
00485       reg                     [qi::_val = phoenix::bind(&OperandParser::identity, this, qi::_1)]
00486       | (num >> lit("(") >> reg >> lit(")"))
00487                               [qi::_val = phoenix::bind(&OperandParser::newDerefAdd, this, qi::_1, qi::_2)]
00488       | shex                  [qi::_val = phoenix::bind(&OperandParser::identity, this, qi::_1)]
00489       ;
00490 }
00491 
00492 void ppcOperandParser::createRegisterNames(Dyninst::Architecture arch)
00493 {
00494    Dyninst::MachRegister::NameMap::iterator i = Dyninst::MachRegister::names()->begin();
00495    for (; i != Dyninst::MachRegister::names()->end(); i++) {
00496       Dyninst::MachRegister reg(i->first);
00497             
00498       if (reg.getArchitecture() != arch) {
00499          continue;
00500       }
00501       unsigned int gpr_code = (arch == Dyninst::Arch_ppc32 ? Dyninst::ppc32::GPR : Dyninst::ppc64::GPR);
00502       if (reg.regClass() != gpr_code) {
00503          continue;
00504       }
00505       std::string full_reg_name = reg.name();
00506       size_t pos = full_reg_name.find("::r");
00507       std::string reg_name = std::string(full_reg_name, pos+3);
00508       register_names[reg_name] = reg;
00509    }
00510 }
00511 
00512 void ArgTree::print(FILE *f)
00513 {
00514    switch (op_type) {
00515       case Register:
00516          fprintf(f, "%%%s", Dyninst::MachRegister(op_data.reg).name().c_str());
00517          break;
00518       case Constant:
00519          fprintf(f, "%ld", op_data.val);
00520          break;
00521       case Add:
00522          fprintf(f, "(");
00523          lchild->print(f);
00524          fprintf(f, " + ");
00525          rchild->print(f);
00526          fprintf(f, ")");
00527          break;
00528       case Multiply:
00529          fprintf(f, "(");
00530          lchild->print(f);
00531          fprintf(f, " * ");
00532          rchild->print(f);
00533          fprintf(f, ")");
00534          break;
00535       case Dereference:
00536          fprintf(f, "*(");
00537          lchild->print(f);
00538          fprintf(f, ")");
00539          break;
00540       case Segment:
00541          lchild->print(f);
00542          fprintf(f, ":");
00543          rchild->print(f);
00544          break;
00545    }
00546 }
00547 
00548 ArgTree::ptr ArgTree::createConstant(const signed long &v) {
00549    ArgTree::ptr ret = ArgTree::ptr(new ArgTree);
00550    ret->op_type = Constant;
00551    ret->op_data.val = v;
00552    return ret;
00553 }         
00554 
00555 ArgTree::ptr ArgTree::createRegister(Dyninst::MachRegister r) {
00556    ArgTree::ptr ret = ArgTree::ptr(new ArgTree);
00557    ret->op_type = Register;
00558    ret->op_data.reg = r;
00559    return ret;
00560 }
00561 
00562 ArgTree::ptr ArgTree::createDeref(ArgTree::ptr sub) {
00563    ArgTree::ptr ret = ArgTree::ptr(new ArgTree);
00564    ret->op_type = Dereference;
00565    ret->op_data.val = 0;
00566    ret->lchild = sub;
00567    return ret;
00568 }
00569 
00570 ArgTree::ptr ArgTree::createSegment(ArgTree::ptr l, ArgTree::ptr r) {
00571    ArgTree::ptr ret = ArgTree::ptr(new ArgTree);
00572    ret->op_type = Segment;
00573    ret->op_data.val = 0;
00574    ret->lchild = l;
00575    ret->rchild = r;
00576    return ret;
00577 }
00578 
00579 ArgTree::ptr ArgTree::createAdd(ArgTree::ptr l, ArgTree::ptr r) {
00580    ArgTree::ptr ret = ArgTree::ptr(new ArgTree);
00581    ret->op_type = Add;
00582    ret->op_data.val = 0;
00583    ret->lchild = l;
00584    ret->rchild = r;
00585    return ret;
00586 }
00587 
00588 ArgTree::ptr ArgTree::createMultiply(ArgTree::ptr l, ArgTree::ptr r) {
00589    ArgTree::ptr ret = ArgTree::ptr(new ArgTree);
00590    ret->op_type = Multiply;
00591    ret->op_data.val = 0;
00592    ret->lchild = l;
00593    ret->rchild = r;
00594    return ret;
00595 }
00596 
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

Generated on 12 Jul 2013 for SymtabAPI by  doxygen 1.6.1