00001 #include "elf/src/SystemTap.h"
00002 #include "elf/h/Elf_X.h"
00003 #include <boost/spirit/include/qi.hpp>
00004 #include <boost/spirit/include/qi_parse.hpp>
00005 #include <boost/spirit/include/phoenix_operator.hpp>
00006 #include <boost/spirit/include/phoenix_object.hpp>
00007 #include <boost/spirit/include/phoenix_bind.hpp>
00008 #include <boost/spirit/include/phoenix_statement.hpp>
00009 #include <boost/fusion/adapted/std_pair.hpp>
00010 #include <boost/tokenizer.hpp>
00011
00012 #include <iostream>
00013 #include <string>
00014 #include <set>
00015 #include <vector>
00016 #include <map>
00017
00018 #include <elf.h>
00019
00020 using namespace std;
00021
00022 namespace qi = boost::spirit::qi;
00023 namespace phoenix = boost::phoenix;
00024 namespace ascii = boost::spirit::ascii;
00025
00026 namespace Dyninst {
00027
00028 struct OperandParser {
00029 typedef qi::rule<string::const_iterator, ArgTree::ptr(), ascii::space_type> ArgTreeRule;
00030
00031 std::map<std::string, Dyninst::MachRegister> register_names;
00032 ArgTree::ptr getReg(std::string name);
00033
00034 ArgTree::ptr newConstant(const signed long &i);
00035 ArgTree::ptr newDeref(ArgTree::ptr p);
00036 ArgTree::ptr newSegment(ArgTree::ptr a, ArgTree::ptr b);
00037 ArgTree::ptr identity(ArgTree::ptr p);
00038 ArgTree::ptr newDerefAdd(ArgTree::ptr a, ArgTree::ptr b);
00039 ArgTree::ptr newBaseIndexScale(ArgTree::ptr base, ArgTree::ptr index, int scale);
00040 ArgTree::ptr newAdd(ArgTree::ptr a, ArgTree::ptr b);
00041 ArgTree::ptr newRegister(const std::vector<char> &v);
00042 };
00043
00044 struct x86OperandParser : public qi::grammar<std::string::const_iterator, ArgTree::ptr(), ascii::space_type>,
00045 public OperandParser
00046 {
00047 ArgTreeRule operand, shex, reg, mem_modrm_nobase, mem_modrm, modrm;
00048
00049 void createRegisterNames(Dyninst::Architecture arch);
00050 x86OperandParser(Dyninst::Architecture arch);
00051 };
00052
00053 struct ppcOperandParser : public qi::grammar<std::string::const_iterator, ArgTree::ptr(), ascii::space_type>,
00054 public OperandParser
00055 {
00056 ArgTreeRule operand, shex, num, reg;
00057
00058 void createRegisterNames(Dyninst::Architecture arch);
00059 ppcOperandParser(Dyninst::Architecture arch);
00060 };
00061
00062 }
00063
00064 using namespace Dyninst;
00065
00066 map<Elf_X *, SystemTapEntries *> SystemTapEntries::all_entries;
00067 SystemTapEntries *SystemTapEntries::createSystemTapEntries(Elf_X *file_)
00068 {
00069 map<Elf_X *, SystemTapEntries *>::iterator i = all_entries.find(file_);
00070 if (i != all_entries.end())
00071 return i->second;
00072
00073 SystemTapEntries *st = new SystemTapEntries(file_);
00074 bool result = st->parse();
00075 if (!result) {
00076 delete st;
00077 st = NULL;
00078 }
00079
00080 all_entries.insert(make_pair(file_, st));
00081 return st;
00082 }
00083
00084 SystemTapEntries::SystemTapEntries(Elf_X *file_) :
00085 file(file_)
00086 {
00087 }
00088
00089 SystemTapEntries::~SystemTapEntries()
00090 {
00091 }
00092
00093 bool SystemTapEntries::parse() {
00094 switch (file->e_machine()) {
00095 case EM_386:
00096 arch = Arch_x86;
00097 break;
00098 case EM_X86_64:
00099 arch = Arch_x86_64;
00100 break;
00101 case EM_PPC:
00102 arch = Arch_ppc32;
00103 break;
00104 case EM_PPC64:
00105 arch = Arch_ppc64;
00106 break;
00107 }
00108 word_size = getArchAddressWidth(arch);
00109
00110 return parseAllNotes();
00111 }
00112
00113 bool SystemTapEntries::parseAllNotes()
00114 {
00115 for (unsigned short i = 0; i < file->e_shnum(); i++) {
00116 Elf_X_Shdr &shdr = file->get_shdr(i);
00117 if (!shdr.isValid())
00118 continue;
00119 if (shdr.sh_type() != SHT_NOTE)
00120 continue;
00121
00122 bool result = parseNotes((const void *) shdr.get_data().d_buf(), shdr.sh_size());
00123 if (!result)
00124 return false;
00125 }
00126
00127 return true;
00128 }
00129
00130 #if !defined(_SDT_NOTE_TYPE)
00131 #define SDT_NOTE_TYPE 3
00132 #endif
00133 #if !defined(_SDT_NOTE_NAME)
00134 #define SDT_NOTE_NAME "stapsdt"
00135 #endif
00136
00137 bool SystemTapEntries::parseNotes(const void *notes, size_t size)
00138 {
00139 unsigned i=0;
00140 const unsigned char *buffer = (const unsigned char *) notes;
00141 bool parseError = false;
00142
00143 while (i < size) {
00144 Entry e;
00145
00146
00147
00148 Elf32_Nhdr *header = (Elf32_Nhdr *) (buffer+i);
00149 i += sizeof(Elf32_Nhdr);
00150
00151
00152 const char *note_name = (const char *) (buffer + i);
00153 i += header->n_namesz;
00154 while (i % 4 != 0) i++;
00155
00156 if (header->n_type != SDT_NOTE_TYPE) {
00157 i += header->n_descsz;
00158 continue;
00159 }
00160 if (strcmp(SDT_NOTE_NAME, note_name) != 0) {
00161 i += header->n_descsz;
00162 continue;
00163 }
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175 bool result = readAddr(buffer, size, i, e.addr);
00176 if (!result) {
00177 parseError = true;
00178 break;
00179 }
00180 result = readAddr(buffer, size, i, e.base_addr);
00181 if (!result) {
00182 parseError = true;
00183 break;
00184 }
00185 result = readAddr(buffer, size, i, e.semaphore_addr);
00186 if (!result) {
00187 parseError = true;
00188 break;
00189 }
00190 result = readString(buffer, size, i, e.provider);
00191 if (!result) {
00192 parseError = true;
00193 break;
00194 }
00195 std::string name;
00196 result = readString(buffer, size, i, name);
00197 if (!result) {
00198 parseError = true;
00199 break;
00200 }
00201 std::string args;
00202 result = readString(buffer, size, i, args);
00203 if (!result) {
00204 parseError = true;
00205 break;
00206 }
00207 while (i % 4 != 0) i++;
00208
00209 result = parseOperands(args, e);
00210 if (!result) {
00211 parseError = true;
00212 break;
00213 }
00214
00215 Entry *entry = new Entry(e);
00216 name_to_entry.insert(make_pair(name, entry));
00217 }
00218
00219 return !parseError;
00220 }
00221
00222 bool SystemTapEntries::readAddr(const unsigned char *buffer, size_t bsize, unsigned &offset,
00223 Dyninst::Address &result, unsigned int read_size)
00224 {
00225 if (!read_size) {
00226 read_size = word_size;
00227 }
00228 if (offset + read_size > bsize)
00229 return false;
00230
00231 if (read_size == 4)
00232 result = *((uint32_t *) (buffer + offset));
00233 else if (read_size == 8)
00234 result = *((uint64_t *) (buffer + offset));
00235 offset += read_size;
00236
00237 return true;
00238 }
00239
00240 bool SystemTapEntries::readString(const unsigned char *buffer, size_t bsize, unsigned &offset,
00241 std::string &result)
00242 {
00243 unsigned int start = offset;
00244 unsigned int end = start;
00245
00246 if (start >= bsize)
00247 return false;
00248
00249 while (buffer[end] != '\0' && end < bsize) end++;
00250 result = std::string(((const char *) buffer)+start, end-start);
00251 offset = end+1;
00252 return true;
00253 }
00254
00255 bool SystemTapEntries::parseOperands(std::string ops, Entry &entry)
00256 {
00257 if (ops.empty() || ops == string(":")) {
00258
00259 return true;
00260 }
00261
00262 typedef boost::tokenizer<boost::char_separator<char> > tok_t;
00263 boost::char_separator<char> sep(" ");
00264 tok_t tokens(ops, sep);
00265
00266 for (tok_t::iterator i = tokens.begin(); i != tokens.end(); i++) {
00267 Arg result;
00268 string arg = *i;
00269
00270 string operand = arg;
00271 result.arg_size = 0;
00272 result.is_arg_signed = false;
00273
00274
00275 size_t at_pos = arg.find('@');
00276 if (at_pos != string::npos) {
00277 string type_info_str(operand, 0, at_pos);
00278 if (!type_info_str.empty()) {
00279 char *endptr = NULL;
00280 signed long type_info = strtol(type_info_str.c_str(), &endptr, 10);
00281 if (*endptr == '\0') {
00282
00283 result.arg_size = abs(type_info);
00284 result.is_arg_signed = (type_info < 0);
00285 operand = string(arg, at_pos+1);
00286 }
00287 }
00288 }
00289 bool bres = true;
00290 if (arch == Arch_x86 || arch == Arch_x86_64)
00291 bres = parseOperand_x86(operand, result);
00292 else if (arch == Arch_ppc32 || arch == Arch_ppc64)
00293 bres = parseOperand_ppc(operand, result);
00294
00295 if (!bres) {
00296 return false;
00297 }
00298 entry.args.push_back(result);
00299 }
00300
00301 return true;
00302 }
00303
00304 x86OperandParser *SystemTapEntries::x86_parser = NULL;
00305 x86OperandParser *SystemTapEntries::x86_64_parser = NULL;
00306 ppcOperandParser *SystemTapEntries::ppc32_parser = NULL;
00307 ppcOperandParser *SystemTapEntries::ppc64_parser = NULL;
00308
00309 bool SystemTapEntries::parseOperand_x86(std::string op, Arg &arg)
00310 {
00311 x86OperandParser* &parser = (arch == Arch_x86) ? x86_parser : x86_64_parser;
00312 if (!parser)
00313 parser = new x86OperandParser(arch);
00314
00315 using boost::spirit::ascii::space;
00316 std::string::const_iterator iter = op.begin();
00317 std::string::const_iterator end = op.end();
00318 bool result = phrase_parse(iter, end, *parser, space, arg.tree);
00319 if (!result || iter != end) {
00320
00321 return false;
00322 }
00323 return true;
00324 }
00325
00326 bool SystemTapEntries::parseOperand_ppc(std::string op, Arg &arg)
00327 {
00328 ppcOperandParser* &parser = (arch == Arch_ppc32) ? ppc32_parser : ppc64_parser;
00329 if (!parser)
00330 parser = new ppcOperandParser(arch);
00331
00332 using boost::spirit::ascii::space;
00333 std::string::const_iterator iter = op.begin();
00334 std::string::const_iterator end = op.end();
00335 bool result = phrase_parse(iter, end, *parser, space, arg.tree);
00336 if (!result || iter != end) {
00337
00338 return false;
00339 }
00340 return true;
00341 }
00342
00343 ArgTree::ptr OperandParser::getReg(std::string name) {
00344 std::map<std::string, Dyninst::MachRegister>::iterator i = register_names.find(name);
00345 if (i == register_names.end())
00346 return ArgTree::createRegister(Dyninst::InvalidReg);
00347 return ArgTree::createRegister(i->second);
00348 }
00349
00350
00351 ArgTree::ptr OperandParser::newConstant(const signed long &i) {
00352 return ArgTree::createConstant(i);
00353 }
00354
00355 ArgTree::ptr OperandParser::newDeref(ArgTree::ptr p) {
00356 return ArgTree::createDeref(p);
00357 }
00358
00359 ArgTree::ptr OperandParser::newSegment(ArgTree::ptr a, ArgTree::ptr b) {
00360 return ArgTree::createSegment(a, b);
00361 }
00362
00363 ArgTree::ptr OperandParser::identity(ArgTree::ptr p) {
00364 return p;
00365 }
00366
00367 ArgTree::ptr OperandParser::newDerefAdd(ArgTree::ptr a, ArgTree::ptr b) {
00368 return ArgTree::createDeref(ArgTree::createAdd(a, b));
00369 }
00370
00371 ArgTree::ptr OperandParser::newBaseIndexScale(ArgTree::ptr base, ArgTree::ptr index, int scale) {
00372 return ArgTree::createAdd(base, ArgTree::createMultiply(index, ArgTree::createConstant(scale)));
00373 }
00374
00375 ArgTree::ptr OperandParser::newAdd(ArgTree::ptr a, ArgTree::ptr b) {
00376 return ArgTree::createAdd(a, b);
00377 }
00378
00379 ArgTree::ptr OperandParser::newRegister(const std::vector<char> &v) {
00380 std::string s;
00381 for (std::vector<char>::const_iterator i = v.begin(); i != v.end(); i++) s += *i;
00382 return getReg(s);
00383 }
00384
00385 x86OperandParser::x86OperandParser(Dyninst::Architecture arch) :
00386 x86OperandParser::base_type(operand)
00387 {
00388 using namespace qi::labels;
00389 using qi::uint_;
00390 using qi::int_;
00391 using qi::lit;
00392 using qi::hex;
00393 using qi::alnum;
00394
00395 using phoenix::construct;
00396 using phoenix::val;
00397 using boost::phoenix::ref;
00398
00399 createRegisterNames(arch);
00400
00401 shex =
00402 lit("0x") >> hex [qi::_val = phoenix::bind(&OperandParser::newConstant, this, qi::_1)]
00403 | lit("-0x") >> hex [qi::_val = phoenix::bind(&OperandParser::newConstant, this, -1*qi::_1)]
00404 | int_ [qi::_val = phoenix::bind(&OperandParser::newConstant, this, qi::_1)]
00405 ;
00406
00407 reg = '%' >> (+alnum) [qi::_val = phoenix::bind(&OperandParser::newRegister, this, qi::_1)];
00408
00409 mem_modrm_nobase =
00410 ('(' >> reg >> ')') [qi::_val = phoenix::bind(&OperandParser::identity, this, qi::_1)]
00411 | ( '(' >> reg >> ',' >> reg >> ',' >> uint_ >> ')' )
00412 [qi::_val = phoenix::bind(&OperandParser::newBaseIndexScale, this, qi::_1, qi::_2, qi::_3)]
00413 ;
00414
00415 mem_modrm =
00416 (shex >> mem_modrm_nobase) [qi::_val = phoenix::bind(&OperandParser::newDerefAdd, this, qi::_1, qi::_2)]
00417 | mem_modrm_nobase [qi::_val = phoenix::bind(&OperandParser::newDeref, this, qi::_1)]
00418 | shex [qi::_val = phoenix::bind(&OperandParser::newDeref, this, qi::_1)]
00419
00420 ;
00421
00422 modrm =
00423 reg [qi::_val = phoenix::bind(&OperandParser::identity, this, qi::_1)]
00424 | mem_modrm [qi::_val = phoenix::bind(&OperandParser::identity, this, qi::_1)]
00425 | '$' >> shex [qi::_val = phoenix::bind(&OperandParser::identity, this, qi::_1)]
00426 ;
00427
00428 operand =
00429 modrm [qi::_val = phoenix::bind(&OperandParser::identity, this, qi::_1)]
00430 | (reg >> ':' >> modrm) [qi::_val = phoenix::bind(&OperandParser::newSegment, this, qi::_1, qi::_2)]
00431 ;
00432 }
00433
00434 void x86OperandParser::createRegisterNames(Dyninst::Architecture arch) {
00435 Dyninst::MachRegister::NameMap::iterator i = Dyninst::MachRegister::names()->begin();
00436 for (; i != Dyninst::MachRegister::names()->end(); i++) {
00437 Dyninst::MachRegister reg(i->first);
00438
00439 if (reg.getArchitecture() != arch) {
00440 continue;
00441 }
00442 unsigned int gpr_code = (arch == Dyninst::Arch_x86 ? Dyninst::x86::GPR : Dyninst::x86_64::GPR);
00443 unsigned int seg_code = (arch == Dyninst::Arch_x86 ? Dyninst::x86::SEG : Dyninst::x86_64::SEG);
00444 if (reg.regClass() != gpr_code &&
00445 reg.regClass() != seg_code &&
00446 !reg.isPC()) {
00447 continue;
00448 }
00449 std::string full_reg_name = reg.name();
00450 size_t pos = full_reg_name.find("::");
00451 std::string reg_name = std::string(full_reg_name, pos+2);
00452 register_names[reg_name] = reg;
00453 }
00454 }
00455
00456
00457 ppcOperandParser::ppcOperandParser(Dyninst::Architecture arch) :
00458 ppcOperandParser::base_type(operand)
00459 {
00460 using namespace qi::labels;
00461 using qi::uint_;
00462 using qi::int_;
00463 using qi::lit;
00464 using qi::hex;
00465 using qi::alnum;
00466
00467 using phoenix::construct;
00468 using phoenix::val;
00469 using boost::phoenix::ref;
00470
00471 createRegisterNames(arch);
00472
00473 shex =
00474 lit("0x") >> hex [qi::_val = phoenix::bind(&OperandParser::newConstant, this, qi::_1)]
00475 | lit("-0x") >> hex [qi::_val = phoenix::bind(&OperandParser::newConstant, this, -1*qi::_1)]
00476 ;
00477
00478 num = int_ [qi::_val = phoenix::bind(&OperandParser::newConstant, this, qi::_1)]
00479 ;
00480
00481 reg = 'r' >> (+alnum) [qi::_val = phoenix::bind(&OperandParser::newRegister, this, qi::_1)];
00482 ;
00483
00484 operand =
00485 reg [qi::_val = phoenix::bind(&OperandParser::identity, this, qi::_1)]
00486 | (num >> lit("(") >> reg >> lit(")"))
00487 [qi::_val = phoenix::bind(&OperandParser::newDerefAdd, this, qi::_1, qi::_2)]
00488 | shex [qi::_val = phoenix::bind(&OperandParser::identity, this, qi::_1)]
00489 ;
00490 }
00491
00492 void ppcOperandParser::createRegisterNames(Dyninst::Architecture arch)
00493 {
00494 Dyninst::MachRegister::NameMap::iterator i = Dyninst::MachRegister::names()->begin();
00495 for (; i != Dyninst::MachRegister::names()->end(); i++) {
00496 Dyninst::MachRegister reg(i->first);
00497
00498 if (reg.getArchitecture() != arch) {
00499 continue;
00500 }
00501 unsigned int gpr_code = (arch == Dyninst::Arch_ppc32 ? Dyninst::ppc32::GPR : Dyninst::ppc64::GPR);
00502 if (reg.regClass() != gpr_code) {
00503 continue;
00504 }
00505 std::string full_reg_name = reg.name();
00506 size_t pos = full_reg_name.find("::r");
00507 std::string reg_name = std::string(full_reg_name, pos+3);
00508 register_names[reg_name] = reg;
00509 }
00510 }
00511
00512 void ArgTree::print(FILE *f)
00513 {
00514 switch (op_type) {
00515 case Register:
00516 fprintf(f, "%%%s", Dyninst::MachRegister(op_data.reg).name().c_str());
00517 break;
00518 case Constant:
00519 fprintf(f, "%ld", op_data.val);
00520 break;
00521 case Add:
00522 fprintf(f, "(");
00523 lchild->print(f);
00524 fprintf(f, " + ");
00525 rchild->print(f);
00526 fprintf(f, ")");
00527 break;
00528 case Multiply:
00529 fprintf(f, "(");
00530 lchild->print(f);
00531 fprintf(f, " * ");
00532 rchild->print(f);
00533 fprintf(f, ")");
00534 break;
00535 case Dereference:
00536 fprintf(f, "*(");
00537 lchild->print(f);
00538 fprintf(f, ")");
00539 break;
00540 case Segment:
00541 lchild->print(f);
00542 fprintf(f, ":");
00543 rchild->print(f);
00544 break;
00545 }
00546 }
00547
00548 ArgTree::ptr ArgTree::createConstant(const signed long &v) {
00549 ArgTree::ptr ret = ArgTree::ptr(new ArgTree);
00550 ret->op_type = Constant;
00551 ret->op_data.val = v;
00552 return ret;
00553 }
00554
00555 ArgTree::ptr ArgTree::createRegister(Dyninst::MachRegister r) {
00556 ArgTree::ptr ret = ArgTree::ptr(new ArgTree);
00557 ret->op_type = Register;
00558 ret->op_data.reg = r;
00559 return ret;
00560 }
00561
00562 ArgTree::ptr ArgTree::createDeref(ArgTree::ptr sub) {
00563 ArgTree::ptr ret = ArgTree::ptr(new ArgTree);
00564 ret->op_type = Dereference;
00565 ret->op_data.val = 0;
00566 ret->lchild = sub;
00567 return ret;
00568 }
00569
00570 ArgTree::ptr ArgTree::createSegment(ArgTree::ptr l, ArgTree::ptr r) {
00571 ArgTree::ptr ret = ArgTree::ptr(new ArgTree);
00572 ret->op_type = Segment;
00573 ret->op_data.val = 0;
00574 ret->lchild = l;
00575 ret->rchild = r;
00576 return ret;
00577 }
00578
00579 ArgTree::ptr ArgTree::createAdd(ArgTree::ptr l, ArgTree::ptr r) {
00580 ArgTree::ptr ret = ArgTree::ptr(new ArgTree);
00581 ret->op_type = Add;
00582 ret->op_data.val = 0;
00583 ret->lchild = l;
00584 ret->rchild = r;
00585 return ret;
00586 }
00587
00588 ArgTree::ptr ArgTree::createMultiply(ArgTree::ptr l, ArgTree::ptr r) {
00589 ArgTree::ptr ret = ArgTree::ptr(new ArgTree);
00590 ret->op_type = Multiply;
00591 ret->op_data.val = 0;
00592 ret->lchild = l;
00593 ret->rchild = r;
00594 return ret;
00595 }
00596