arch-x86.h

Go to the documentation of this file.
00001 /*
00002  * See the dyninst/COPYRIGHT file for copyright information.
00003  * 
00004  * We provide the Paradyn Tools (below described as "Paradyn")
00005  * on an AS IS basis, and do not warrant its validity or performance.
00006  * We reserve the right to update, modify, or discontinue this
00007  * software at any time.  We shall have no obligation to supply such
00008  * updates or modifications or any other form of support to you.
00009  * 
00010  * By your use of Paradyn, you understand and agree that we (or any
00011  * other person or entity with proprietary rights in Paradyn) are
00012  * under no obligation to provide either maintenance services,
00013  * update services, notices of latent defects, or correction of
00014  * defects for Paradyn.
00015  * 
00016  * This library is free software; you can redistribute it and/or
00017  * modify it under the terms of the GNU Lesser General Public
00018  * License as published by the Free Software Foundation; either
00019  * version 2.1 of the License, or (at your option) any later version.
00020  * 
00021  * This library is distributed in the hope that it will be useful,
00022  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00023  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00024  * Lesser General Public License for more details.
00025  * 
00026  * You should have received a copy of the GNU Lesser General Public
00027  * License along with this library; if not, write to the Free Software
00028  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00029  */
00030 
00031 // $Id: arch-x86.h,v 1.67 2008/10/28 18:42:39 bernat Exp $
00032 // x86 instruction declarations
00033 
00034 #ifndef _ARCH_X86_H
00035 #define _ARCH_X86_H
00036 
00037 #include "common/h/Types.h"
00038 #include <stdio.h>
00039 #include <common/h/Vector.h>
00040 #include <set>
00041 #include <map>
00042 #include <vector>
00043 #include "dyn_regs.h"
00044 #include "entryIDs.h"
00045 
00046 #include "common/h/ia32_locations.h"
00047 
00048 #if defined(i386_unknown_nt4_0)
00049 // disable VC++ warning C4800: (performance warning)
00050 // forcing 'unsigned int' value to bool 'true' or 'false'
00051 #pragma warning (disable : 4800)
00052 #endif
00053 
00054 namespace NS_x86 {
00055 
00056 /* operand types */
00057 typedef char byte_t;   /* a byte operand */
00058 typedef short word_t;  /* a word (16-bit) operand */
00059 typedef int dword_t;   /* a double word (32-bit) operand */
00060 
00061 // The general machine registers.
00062 // These values are taken from the Pentium manual and CANNOT be changed.
00063 
00064 // 32-bit
00065 #define REGNUM_EAX 0
00066 #define REGNUM_ECX 1
00067 #define REGNUM_EDX 2
00068 #define REGNUM_EBX 3
00069 #define REGNUM_ESP 4
00070 #define REGNUM_EBP 5
00071 #define REGNUM_ESI 6
00072 #define REGNUM_EDI 7
00073 
00074 // 64-bit
00075 enum AMD64_REG_NUMBERS {
00076     REGNUM_RAX = 0,
00077     REGNUM_RCX,
00078     REGNUM_RDX,
00079     REGNUM_RBX,
00080     REGNUM_RSP,
00081     REGNUM_RBP,
00082     REGNUM_RSI,
00083     REGNUM_RDI,
00084     REGNUM_R8,
00085     REGNUM_R9,
00086     REGNUM_R10,
00087     REGNUM_R11,
00088     REGNUM_R12,
00089     REGNUM_R13,
00090     REGNUM_R14,
00091     REGNUM_R15,
00092     REGNUM_DUMMYFPR,
00093     REGNUM_OF,
00094     REGNUM_SF,
00095     REGNUM_ZF,
00096     REGNUM_AF,
00097     REGNUM_PF,
00098     REGNUM_CF,
00099     REGNUM_TF,
00100     REGNUM_IF,
00101     REGNUM_DF,
00102     REGNUM_NT,
00103     REGNUM_RF,
00104     REGNUM_MM0,
00105     REGNUM_MM1,
00106     REGNUM_MM2,
00107     REGNUM_MM3,
00108     REGNUM_MM4,
00109     REGNUM_MM5,
00110     REGNUM_MM6,
00111     REGNUM_MM7,
00112     REGNUM_XMM0,
00113     REGNUM_XMM1,
00114     REGNUM_XMM2,
00115     REGNUM_XMM3,
00116     REGNUM_XMM4,
00117     REGNUM_XMM5,
00118     REGNUM_XMM6,
00119     REGNUM_XMM7,
00120     REGNUM_XMM8,
00121     REGNUM_XMM9,
00122     REGNUM_XMM10,
00123     REGNUM_XMM11,
00124     REGNUM_XMM12,
00125     REGNUM_XMM13,
00126     REGNUM_XMM14,
00127     REGNUM_XMM15,
00128     REGNUM_EFLAGS,
00129     REGNUM_IGNORED
00130 }
00131 ;
00132 
00133 #if defined(arch_x86_64)
00134 #define maxGPR 16
00135 #else
00136 #define maxGPR 8
00137 #endif 
00138 
00139 #define READ_OP 0
00140 #define WRITE_OP 1
00141 
00142 /* operand sizes */
00143 #define byteSzB (1)    /* size of a byte operand */
00144 #define wordSzB (2)    /* size of a word operand */
00145 #define dwordSzB (4)   /* size of a dword operand */
00146 #define qwordSzB (8)   /* size of a qword operand */
00147 #define dqwordSzB (16)   /* size of a double qword (oword) operand */
00148 
00149 /* The following values are or'ed together to form an instruction type descriptor */
00150 /* the instruction types of interest */
00151 #define IS_CALL (1<<1)   /* call instruction */
00152 #define IS_RET  (1<<2)   /* near return instruction */
00153 #define IS_RETF (1<<3)   /* far return instruction */
00154 #define IS_JUMP (1<<4)   /* jump instruction */
00155 #define IS_JCC  (1<<5)   /* conditional jump instruction */
00156 #define ILLEGAL (1<<6)   /* illegal instruction */
00157 #define PRVLGD  (1<<7)   /* privileged */
00158 #define IS_RETC (1<<8)   /* return and pop bytes off of stack*/
00159 #define IS_NOP  (1<<9)   /* Nop, Lea--lea is only sometime a return, be sure to double check */
00160 
00161 /* addressing modes for calls and jumps */
00162 #define REL_B   (1<<10)  /* relative address, byte offset */
00163 #define REL_W   (1<<11)  /* relative address, word offset */
00164 #define REL_D   (1<<12)  /* relative address, dword offset */
00165 #define REL_X   (1<<13)  /* relative address, word or dword offset */
00166 #define INDIR   (1<<14)  /* indirect (register or memory) address */
00167 #define PTR_WW  (1<<15)  /* 4-byte pointer */
00168 #define PTR_WD  (1<<16)  /* 6-byte pointer */
00169 #define PTR_WX  (1<<17)  /* 4 or 6-byte pointer */
00170 #define REL_D_DATA (1<<18) /* AMD64 RIP-relative data addressing */
00171 
00172 /* prefixes */
00173 #define PREFIX_INST   (1<<20) /* instruction prefix */
00174 #define PREFIX_SEG    (1<<21) /* segment override prefix */
00175 #define PREFIX_OPR    (1<<22) /* operand size override */
00176 #define PREFIX_ADDR   (1<<23) /* address size override */
00177 #define PREFIX_REX    (1<<24) /* AMD64 REX prefix */
00178 #define PREFIX_OPCODE (1<<25) /* prefix is part of opcode (SSE) */
00179 
00180 /* end of instruction type descriptor values */
00181 
00182 
00183 /* opcodes of some one byte opcode instructions */
00184 /* ADD */
00185 #define ADD_EB_GB (0x00)
00186 #define ADD_EV_GV (0x01)
00187 #define ADD_GB_EB (0x02)
00188 #define ADD_GV_EV (0x03)
00189 #define ADD_AL_LB (0x04)
00190 #define ADD_RAX_LZ (0x05)
00191 
00192 #define PUSHES    (0x06) /* Invalid in 64 bit mode */
00193 #define POPES     (0x07) /* Invalid in 64 bit mode */
00194 
00195 /* OR */
00196 #define OR_EB_GB (0x08)
00197 #define OR_EV_GV (0x09)
00198 #define OR_GB_EB (0x0A)
00199 #define OR_GV_EV (0x0B)
00200 #define OR_AL_LB (0x0C)
00201 #define OR_RAX_LZ (0x0D)
00202 
00203 #define PUSHCS    (0x0E) /* Invalid in 64 bit mode */
00204 #define TWO_BYTE_OPCODE (0x0F)
00205 
00206 /* ADC */
00207 #define ADC_EB_GB (0x10)
00208 #define ADC_EV_GV (0x11)
00209 #define ADC_GB_EB (0x12)
00210 #define ADC_GV_EV (0x13)
00211 #define ADC_AL_LB (0x14)
00212 #define ADC_RAX_LZ (0x15)
00213 
00214 #define PUSHSS    (0x16) /* Invalid in 64 bit mode */
00215 #define POPSS     (0x17) /* Invalid in 64 bit mode */
00216 
00217 /* SBB */
00218 #define SBB_EB_GB (0x18)
00219 #define SBB_EV_GV (0x19)
00220 #define SBB_GB_EB (0x1A)
00221 #define SBB_GV_EV (0x1B)
00222 #define SBB_AL_LB (0x1C)
00223 #define SBB_RAX_LZ (0x1D)
00224 
00225 #define PUSH_DS  (0x1E) /* Invalid in 64 bit mode */
00226 #define POP_DS   (0X1F) /* Invalid in 64 bit mode */
00227 
00228 /* AND */
00229 #define AND_EB_GB (0x20)
00230 #define AND_EV_GV (0x21)
00231 #define AND_GB_EB (0x22)
00232 #define AND_GV_EV (0x23)
00233 #define AND_AL_LB (0x24)
00234 #define AND_RAX_LZ (0x25)
00235 
00236 #define SEG_ES (0x26) /* Null prefix in 64-bit mode */
00237 #define DAA    (0x27) /* Invalid in 64-bit mode */
00238 
00239 /* SUB */
00240 #define SUB_EB_GB (0x28)
00241 #define SUB_EV_GV (0x29)
00242 #define SUB_GB_EB (0x2A)
00243 #define SUB_GV_EV (0x2B)
00244 #define SUB_AL_LB (0x2C)
00245 #define SUB_RAX_LZ (0x2D)
00246 
00247 //(0x2E)
00248 //   (0x2F)
00249 
00250 /* XOR */
00251 #define XOR_EB_GB (0x30)
00252 #define XOR_EV_GV (0x31)
00253 #define XOR_GB_EB (0x32)
00254 #define XOR_GV_EV (0x33)
00255 #define XOR_AL_LB (0x34)
00256 #define XOR_RAX_LZ (0x35)
00257 
00258 #define XOR_RM16_R16 (0x31)
00259 #define XOR_RM32_R32 (0x31)
00260 #define XOR_R8_RM8 (0x32)
00261 #define XOR_R16_RM16 (0x33)
00262 #define XOR_R32_RM32 (0x33)
00263 
00264 #define SEG_SS (0x36) /* Null prefix in 64 bit mode */
00265 #define AAA (0x37)    /* Invalid in 64-bit mode */
00266 
00267 
00268 /* CMP */
00269 #define CMP_EB_GB (0x38)
00270 #define CMP_EV_GV (0x39)
00271 #define CMP_GB_EB (0x3A)
00272 #define CMP_GV_EV (0x3B)
00273 #define CMP_AL_LB (0x3C)
00274 #define CMP_RAX_LZ (0x3D)
00275 
00276 #define TEST_EV_GV (0x85)
00277 //   (0x3E)
00278 //   (0x3F)
00279 
00280 /* INC - REX Prefixes in 64 bit mode*/
00281 #define INC_EAX  (0x40)
00282 #define INC_ECX  (0x41)
00283 #define INC_EDX  (0x42)
00284 #define INC_EBX  (0x43)
00285 #define INC_ESP  (0x44)
00286 #define INC_EBP  (0x45)
00287 #define INC_ESI  (0x46)
00288 #define INC_EDI  (0x47)
00289 
00290 /* DEC - REX Prefixes in 64 bit mode */
00291 #define DEC_EAX  (0x48)
00292 #define DEC_ECX  (0x49)
00293 #define DEC_EDX  (0x50)
00294 #define DEC_EBX  (0x51)
00295 #define DEC_ESP  (0x52)
00296 #define DEC_EBP  (0x53)
00297 #define DEC_ESI  (0x54)
00298 #define DEC_EDI  (0x55)
00299 
00300 /* PUSH */
00301 #define PUSHEAX  (0x50)
00302 #define PUSHECX  (0x51)
00303 #define PUSHEDX  (0x52)
00304 #define PUSHEBX  (0x53)
00305 #define PUSHESP  (0x54)
00306 #define PUSHEBP  (0x55)
00307 #define PUSHESI  (0x56)
00308 #define PUSHEDI  (0x57)
00309 
00310 /* POP */
00311 #define POP_EAX  (0x58)
00312 #define POP_ECX  (0x59)
00313 #define POP_EDX  (0x5A)
00314 #define POP_EBX  (0x5b)
00315 #define POP_ESP  (0x5c)
00316 #define POP_EBP  (0x5d)
00317 #define POP_EBI  (0x5e)
00318 #define POP_EDI  (0x5f)
00319 
00320 
00321 #define PUSHAD   (0x60)
00322 #define POPAD    (0x61)
00323 
00324 
00325 
00326 
00327 #define JE_R8    (0x74)
00328 #define JNE_R8   (0x75)
00329 #define JL_R8    (0x7C)
00330 #define JLE_R8   (0x7E)
00331 #define JG_R8    (0x7F)
00332 #define JGE_R8   (0x7D)
00333 
00334 
00335 #define MOVREGMEM_REG (0x8b) 
00336 #define MOV_R8_TO_RM8 (0x88)     //move r8 to r/m8
00337 #define MOV_R16_TO_RM16 (0x89)   //move r16 to r/m16
00338 #define MOV_R32_TO_RM32 (0x89)   //move r32 to r/m32
00339 #define MOV_RM8_TO_R8 (0x8A)
00340 #define MOV_RM16_TO_R16 (0x8b)
00341 #define MOV_RM32_TO_R32 (0x8b)
00342 
00343 
00344 
00345 #define NOP      (0x90)
00346 #define PUSHFD   (0x9C)
00347 #define POPFD    (0x9D)
00348 
00349 
00350 #define JCXZ     (0xE3)
00351 
00352 
00353 
00354 
00355 
00356 
00357 #define FSAVE    (0x9BDD)
00358 #define FSAVE_OP (6)
00359 
00360 #define FRSTOR   (0xDD)
00361 #define FRSTOR_OP (4)
00362 
00363 const unsigned char SYSCALL[] = {0x0F, 0x05};
00364 
00365 /* limits */
00366 #define MIN_IMM8 (-128)
00367 #define MAX_IMM8 (127)
00368 #define MIN_IMM16 (-32768)
00369 #define MAX_IMM16 (32767)
00370 
00371 // Size of floating point information saved by FSAVE
00372 #define FSAVE_STATE_SIZE 108
00373 
00374 // Prefix groups
00375 enum {
00376   RepGroup = 0
00377 };
00378 
00379 
00380 #ifndef PREFIX_LOCK
00381 #define PREFIX_LOCK   (unsigned char)(0xF0)
00382 #define PREFIX_REPNZ  (unsigned char)(0xF2)
00383 #define PREFIX_REP    (unsigned char)(0xF3)
00384 
00385 #define PREFIX_SEGCS  (unsigned char)(0x2E)
00386 #define PREFIX_SEGSS  (unsigned char)(0x36)
00387 #define PREFIX_SEGDS  (unsigned char)(0x3E)
00388 #define PREFIX_SEGES  (unsigned char)(0x26)
00389 #define PREFIX_SEGFS  (unsigned char)(0x64)
00390 #define PREFIX_SEGGS  (unsigned char)(0x65)
00391 
00392 #define PREFIX_BRANCH0 (unsigned char)(0x2E)
00393 #define PREFIX_BRANCH1 (unsigned char)(0x3E)
00394 
00395 #define PREFIX_SZOPER  (unsigned char)(0x66)
00396 #define PREFIX_SZADDR  (unsigned char)(0x67)
00397 #endif
00398 
00399 COMMON_EXPORT void ia32_set_mode_64(bool mode);
00400 COMMON_EXPORT bool ia32_is_mode_64();
00401 
00402 // addressing methods (see appendix A-2)
00403 // I've added am_reg (for registers implicitely encoded in instruciton), 
00404 // and am_stackX for stack operands [this kinda' messy since there are actually two operands:
00405 // the stack byte/word/dword and the (E)SP register itself - but is better than naught]
00406 // added: am_reg, am_stack, am_allgprs
00407 // ADDED: am_ImplImm for implicit immediates
00408 // ADDED: am_RM, am_UM,
00409 enum { am_A=1, am_C, am_D, am_E, am_F, am_G, am_I, am_J, am_M, am_O, // 10
00410        am_P, am_Q, am_R, am_S, am_T, am_U, am_UM, am_V, am_W, am_X, am_Y, am_reg, // 20
00411        am_stackH, am_stackP, am_allgprs, am_VR, am_tworeghack, am_ImplImm, am_RM }; // pusH and poP produce different addresses
00412 
00413 // operand types - idem, but I invented quite a few to make implicit operands explicit.
00414 // ADDED: op_y
00415 enum { op_a=1, op_b, op_c, op_d, op_dq, op_p, op_pd, op_pi, op_ps, // 9 
00416        op_q, op_s, op_sd, op_ss, op_si, op_v, op_w, op_y, op_z, op_lea, op_allgprs, op_512,
00417        op_f, op_dbl, op_14, op_28, op_edxeax, op_ecxebx};
00418 
00419 
00420 // tables and pseudotables
00421 enum {
00422   t_ill=0, t_oneB, t_twoB, t_threeB, t_threeB2, t_prefixedSSE, t_coprocEsc, t_grp, t_sse, t_sse_bis, t_sse_ter, t_grpsse, t_3dnow, t_done=99
00423 };
00424 
00425 // registers used for memory access
00426 enum { mRAX=0, mRCX, mRDX, mRBX,
00427        mRSP, mRBP, mRSI, mRDI,
00428        mR8, mR9, mR10, mR11, 
00429        mR12,mR13, MR14, mR15, mRIP };
00430 
00431 enum { mEAX=0, mECX, mEDX, mEBX,
00432        mESP, mEBP, mESI, mEDI };
00433 
00434 enum { mAX=0, mCX, mDX, mBX,
00435        mSP, mBP, mSI, mDI };
00436 
00437 
00438 // operand semantic - these make explicit all the implicit stuff in the Intel tables
00439 // they are needed for memory access, but may be useful for other things: dataflow etc.
00440 // Instructions that do not deal with memory are not tested, so caveat emptor...
00441 // Also note that the stack is never specified as an operand in Intel tables, so it
00442 // has to be dealt with here.
00443 
00444 enum { sNONE=0, // the instruction does something that cannot be classified as read/write (by me)
00445        s1R,     // reads one operand, e.g. jumps
00446        s1W,     // e.g. lea
00447        s1RW,    // one operand read and written, e.g. inc
00448        s1R2R,   // reads two operands, e.g. cmp
00449        s1W2R,   // second operand read, first operand written (e.g. mov)
00450        s1RW2R,  // two operands read, first written (e.g. add)
00451        s1RW2RW, // e.g. xchg
00452        s1W2R3R, // e.g. imul
00453        s1W2W3R, // e.g. les
00454        s1W2RW3R, // some mul
00455        s1R2RW, // (stack) push
00456        s1W2RW, // pop
00457        s1W2R3RW, // additional push/pop
00458        s1RW2R3R, // shld/shrd
00459        s1RW2RW3R, // [i]div, cmpxch8b
00460        s1R2R3R
00461 }; // should be no more than 2^16 otherwise adjust FPOS below
00462 
00463 
00464 struct modRMByte {
00465   unsigned mod : 2;
00466   unsigned reg : 3;
00467   unsigned rm  : 3;
00468 };
00469 
00470 struct sIBByte {
00471   unsigned scale : 2;
00472   unsigned index : 3;
00473   unsigned base  : 3;
00474 };
00475 
00476 
00477 class ia32_prefixes
00478 {
00479   friend bool ia32_decode_prefixes(const unsigned char* addr, ia32_prefixes&,
00480                                    ia32_locations *loc);
00481   friend bool ia32_decode_rex(const unsigned char* addr, ia32_prefixes&,
00482                               ia32_locations *loc);
00483  private:
00484   unsigned int count;
00485   // At most 4 prefixes are allowed for Intel 32-bit CPUs
00486   // There also 4 groups, so this array is 0 if no prefix
00487   // from that group is present, otherwise it contains the
00488   // prefix opcode
00489   // For 64-bit CPUs, an additional REX prefix is possible,
00490   // so this array is extended to 5 elements
00491   unsigned char prfx[5];
00492   unsigned char opcode_prefix;
00493  public:
00494   unsigned int getCount() const { return count; }
00495   unsigned char getPrefix(unsigned char group) const { assert(group <= 4); return prfx[group]; }
00496   bool rexW() const { return prfx[4] & 0x8; }
00497   bool rexR() const { return prfx[4] & 0x4; }
00498   bool rexX() const { return prfx[4] & 0x2; }
00499   bool rexB() const { return prfx[4] & 0x1; }
00500   unsigned char getOpcodePrefix() const { return opcode_prefix; }
00501   unsigned char getAddrSzPrefix() const { return prfx[3]; }
00502   unsigned char getOperSzPrefix() const { return prfx[2]; }
00503 };
00504 
00505 // helper routine to tack-on rex bit when needed
00506 inline int apply_rex_bit(int reg, bool rex_bit)
00507 {
00508     if (rex_bit)
00509     return reg + 8;
00510     else
00511     return reg;
00512 }
00513 
00514 //VG(6/20/02): To support Paradyn without forcing it to include BPatch_memoryAccess, we
00515 //             define this IA-32 "specific" class to encapsulate the same info - yuck
00516 
00517 struct ia32_memacc
00518 {
00519   bool is;
00520   bool read;
00521   bool write;
00522   bool nt;     // non-temporal, e.g. movntq...
00523   bool prefetch;
00524 
00525   int addr_size; // size of address in 16-bit words
00526   long imm;
00527   int scale;
00528   int regs[2]; // register encodings (in ISA order): 0-7
00529                // (E)AX, (E)CX, (E)DX, (E)BX
00530                // (E)SP, (E)BP, (E)SI, (E)DI
00531 
00532   int size;
00533   int sizehack;  // register (E)CX or string based
00534   int prefetchlvl; // prefetch level
00535   int prefetchstt; // prefetch state (AMD)
00536 
00537   ia32_memacc() : is(false), read(false), write(false), nt(false), 
00538        prefetch(false), addr_size(2), imm(0), scale(0), size(0), sizehack(0),
00539        prefetchlvl(-1), prefetchstt(-1)
00540   {
00541     regs[0] = -1;
00542     regs[1] = -1;
00543   }
00544 
00545   void set16(int reg0, int reg1, long disp)
00546   { 
00547     is = true;
00548     addr_size  = 1; 
00549     regs[0] = reg0; 
00550     regs[1] = reg1; 
00551     imm     = disp;
00552   }
00553 
00554   void set(int reg, long disp, int addr_sz)
00555   { 
00556     is = true;
00557     addr_size = addr_sz;
00558     regs[0] = reg; 
00559     imm     = disp;
00560   }
00561 
00562   void set_sib(int base, int scal, int indx, long disp, int addr_sz)
00563   {
00564     is = true;
00565     addr_size = addr_sz;
00566     regs[0] = base;
00567     regs[1] = indx;
00568     scale   = scal;
00569     imm     = disp;
00570   }
00571 
00572   void setXY(int reg, int _size, int _addr_size)
00573   {
00574     is = true;
00575     regs[0] = reg;
00576     size = _size;
00577     addr_size = _addr_size;
00578   }
00579 
00580   void print();
00581 };
00582 
00583 enum sizehacks {
00584   shREP=1,
00585   shREPECMPS,
00586   shREPESCAS,
00587   shREPNECMPS,
00588   shREPNESCAS
00589 };
00590 
00591 
00592 struct ia32_condition
00593 {
00594   bool is;
00595   // TODO: add a field/hack for ECX [not needed for CMOVcc, but for Jcc]
00596   int tttn;
00597 
00598   ia32_condition() : is(false) {}
00599   void set(int _tttn) { is = true; tttn = _tttn; }
00600 };
00601 
00602 bool ia32_decode_prefixes(const unsigned char* addr, ia32_prefixes&,
00603                           ia32_locations *loc = NULL);
00604 
00605 
00606 struct ia32_operand {  // operand as given in Intel book tables
00607   unsigned int admet;  // addressing method
00608   unsigned int optype; // operand type;
00609 };
00610 
00611 // An instruction table entry
00612 struct ia32_entry {
00613   COMMON_EXPORT const char* name(ia32_locations* locs = NULL);
00614   COMMON_EXPORT entryID getID(ia32_locations* locs = NULL) const;
00615   // returns true if any flags are read/written, false otherwise
00616   COMMON_EXPORT bool flagsUsed(std::set<Dyninst::MachRegister>& flagsRead, std::set<Dyninst::MachRegister>& flagsWritten,
00617          ia32_locations* locs = NULL);
00618   entryID id;
00619   unsigned int otable;       // which opcode table is next; if t_done it is the current one
00620   unsigned char tabidx;      // at what index to look, 0 if it easy to deduce from opcode
00621   bool hasModRM;             // true if the instruction has a MOD/RM byte
00622   ia32_operand operands[3];  // operand descriptors
00623   unsigned int legacyType;   // legacy type of the instruction (e.g. (IS_CALL | REL_W))
00624   // code to decode memory access - this field should be seen as two 16 bit fields
00625   // the lower half gives operand semantics, e.g. s1RW2R, the upper half is a fXXX hack if needed
00626   // before hating me for this: it takes a LOT less time to add ONE field to ~2000 table lines!
00627   unsigned int opsema;  
00628 };
00629 
00630 using std::vector;
00631 struct flagInfo
00632 {
00633   flagInfo(const vector<Dyninst::MachRegister>& rf, const vector<Dyninst::MachRegister>& wf) : readFlags(rf), writtenFlags(wf)
00634   {
00635   }
00636   flagInfo() 
00637   {
00638   }
00639   
00640   vector<Dyninst::MachRegister> readFlags;
00641   vector<Dyninst::MachRegister> writtenFlags;
00642 };
00643 
00644 class ia32_instruction
00645 {
00646   friend unsigned int ia32_decode_operands (const ia32_prefixes& pref, 
00647                                             const ia32_entry& gotit, 
00648                                             const char* addr, 
00649                                             ia32_instruction& instruct);
00650   friend COMMON_EXPORT ia32_instruction& ia32_decode(unsigned int capa, const unsigned char* addr,
00651                        ia32_instruction& instruct);
00652   friend unsigned int ia32_decode_operands (const ia32_prefixes& pref, const ia32_entry& gotit, 
00653                                             const unsigned char* addr, ia32_instruction& instruct,
00654                                             ia32_memacc *mac = NULL);
00655   friend ia32_instruction& ia32_decode_FP(const ia32_prefixes& pref, const unsigned char* addr,
00656                                           ia32_instruction& instruct);
00657   friend unsigned int ia32_emulate_old_type(ia32_instruction& instruct);
00658   friend ia32_instruction& ia32_decode_FP(unsigned int opcode, 
00659                                           const ia32_prefixes& pref,
00660                                           const unsigned char* addr, 
00661                                           ia32_instruction& instruct,
00662                       ia32_entry * entry,
00663                                           ia32_memacc *mac = NULL);
00664 
00665   unsigned int   size;
00666   ia32_prefixes  prf;
00667   ia32_memacc    *mac;
00668   ia32_condition *cond;
00669   ia32_entry     *entry;
00670   ia32_locations *loc;
00671   unsigned int   legacy_type;
00672   bool           rip_relative_data;
00673 
00674 
00675  public:
00676   ia32_instruction(ia32_memacc* _mac = NULL, ia32_condition* _cnd = NULL,
00677                    ia32_locations *loc_ = NULL)
00678     : mac(_mac), cond(_cnd), entry(NULL), loc(loc_), rip_relative_data(false) {}
00679 
00680   ia32_entry * getEntry() { return entry; }
00681   unsigned int getSize() const { return size; }
00682   unsigned int getPrefixCount() const { return prf.getCount(); }
00683   ia32_prefixes * getPrefix() { return &prf; }
00684   unsigned int getLegacyType() const { return legacy_type; }
00685   bool hasRipRelativeData() const { return rip_relative_data; }
00686   const ia32_memacc& getMac(int which) const { return mac[which]; }
00687   const ia32_condition& getCond() const { return *cond; }
00688   const ia32_locations& getLocationInfo() const { return *loc; }
00689 
00690   COMMON_EXPORT static dyn_hash_map<entryID, flagInfo> const& getFlagTable();
00691   static void initFlagTable(dyn_hash_map<entryID, flagInfo>&);
00692   
00693 };
00694 
00695 // VG(02/07/2002): Information that the decoder can return is
00696 //   #defined below. The decoder always returns the size of the 
00697 //   instruction because that has to be determined anyway.
00698 //   Please don't add things that should be external to the
00699 //   decoder, e.g.: how may bytes a relocated instruction needs
00700 //   IMHO that stuff should go into inst-x86...
00701 
00702 #define IA32_DECODE_PREFIXES    (1<<0)
00703 #define IA32_DECODE_MNEMONICS   (1<<1)
00704 #define IA32_DECODE_OPERANDS    (1<<2)
00705 #define IA32_DECODE_JMPS    (1<<3)
00706 #define IA32_DECODE_MEMACCESS   (1<<4)
00707 #define IA32_DECODE_CONDITION   (1<<5)
00708 
00709 #define IA32_FULL_DECODER (IA32_DECODE_PREFIXES | IA32_DECODE_MNEMONICS | IA32_DECODE_OPERANDS | IA32_DECODE_JMPS | IA32_DECODE_MEMACCESS | IA32_DECODE_CONDITION)
00710 #define IA32_SIZE_DECODER 0
00711 
00712 COMMON_EXPORT ia32_instruction& ia32_decode(unsigned int capabilities, const unsigned char* addr, ia32_instruction&);
00713 
00714 
00715 enum dynamic_call_address_mode {
00716   REGISTER_DIRECT, REGISTER_INDIRECT,
00717   REGISTER_INDIRECT_DISPLACED, SIB, DISPLACED, 
00718   IP_INDIRECT_DISPLACED
00719 };
00720 
00721 /*
00722    get_instruction: get the instruction that starts at instr.
00723    return the size of the instruction and set instType to a type descriptor
00724 */
00725 COMMON_EXPORT unsigned get_instruction(const unsigned char *instr, unsigned &instType,
00726              const unsigned char** op_ptr = NULL);
00727 
00728 /* get the target of a jump or call */
00729 COMMON_EXPORT Address get_target(const unsigned char *instr, unsigned type, unsigned size,
00730            Address addr);
00731 
00732 // Size of a jump rel32 instruction
00733 #define JUMP_REL32_SZ (6)
00734 // Maxium size of an emitted jump
00735 #define JUMP_SZ (5)
00736 // Size of a call rel32 instruction
00737 #define CALL_REL32_SZ (5)
00738 // >2gb displacement in 32 bit mode
00739 #define CALL_ABS32_SZ (11)
00740 #define JUMP_ABS32_SZ (6)
00741 // Max size of a relocated thunk call
00742 #define CALL_RELOC_THUNK (13)
00743 
00744 #if defined(arch_x86_64)
00745 // size of instruction seqeunce to get anywhere in address space
00746 // without touching any registers
00747 #define JUMP_ABS64_SZ (17)
00748 // Jump is push/return; call is push/push/return, so subtract a return
00749 #define CALL_ABS64_SZ (JUMP_ABS64_SZ+JUMP_ABS64_SZ-1)
00750 #endif
00751 
00752 #define PUSH_RM_OPC1 (0xFF)
00753 #define PUSH_RM_OPC2 (6)
00754 #define CALL_RM_OPC1 (0xFF)
00755 #define CALL_RM_OPC2 (2)
00756 #define JUMP_RM_OPC1 (0xFF)
00757 #define JUMP_RM_OPC2 (4)
00758 #define PUSH_EBP (0x50+REGNUM_EBP)
00759 #define SUB_REG_IMM32 (5)
00760 #define LEAVE (0xC9)
00761 
00762 #define EXTENDED_0x81_ADD 0
00763 #define EXTENDED_0x81_OR 1
00764 #define EXTENDED_0x81_ADDC 2
00765 #define EXTENDED_0x81_SHIFT 3
00766 #define EXTENDED_0x81_AND 4
00767 #define EXTENDED_0x81_SUB 5
00768 #define EXTENDED_0x81_XOR 6
00769 #define EXTENDED_0x81_CMP 7
00770 #define EXTENDED_0x83_AND 4
00771 
00772 unsigned int swapBytesIfNeeded(unsigned int i);
00773 
00774 class instruction {
00775  public:
00776     instruction(): type_(0), size_(0), ptr_(0), op_ptr_(0) {}
00777 
00778   instruction(const unsigned char *p, unsigned type, unsigned sz, const unsigned char* op = 0):
00779       type_(type), size_(sz), ptr_(p), op_ptr_(op ? op : p) {}
00780 
00781   instruction(const instruction &insn)
00782   {
00783     type_ = insn.type_;
00784     size_ = insn.size_;
00785     ptr_ = insn.ptr_;
00786     op_ptr_ = insn.op_ptr_;
00787   }
00788   
00789   COMMON_EXPORT instruction *copy() const;
00790 
00791   instruction(const void *ptr) :
00792       type_(0), size_(0), ptr_(NULL), op_ptr_(0) {
00793       setInstruction((const unsigned char*)ptr);
00794   }
00795 
00796   unsigned setInstruction(const unsigned char *p, Address = 0) {
00797       ptr_ = p;
00798       size_ = get_instruction(ptr_, type_, &op_ptr_);
00799       return size_;
00800   }
00801 
00802   // if the instruction is a jump or call, return the target, else return zero
00803   Address getTarget(Address addr) const { 
00804     return (Address)get_target(ptr_, type_, size_, addr); 
00805   }
00806 
00807   // return the size of the instruction in bytes
00808   unsigned size() const { return size_; }
00809 
00810   // And the size necessary to reproduce this instruction
00811   // at some random point.
00812   COMMON_EXPORT unsigned spaceToRelocate() const;
00813 
00814   // return the type of the instruction
00815   unsigned type() const { return type_; }
00816 
00817   // return a pointer to the instruction
00818   const unsigned char *ptr() const { 
00819       return ptr_; 
00820   }
00821 
00822   // return a pointer to the instruction's opcode
00823   const unsigned char* op_ptr() const { return op_ptr_; }
00824 
00825   // Function relocation...
00826   static unsigned maxInterFunctionJumpSize(unsigned addr_width) { return maxJumpSize(addr_width); }
00827 
00828   // And tell us how much space we'll need...
00829   COMMON_EXPORT static unsigned jumpSize(Address from, Address to, unsigned addr_width);
00830   COMMON_EXPORT static unsigned jumpSize(long disp, unsigned addr_width);
00831   COMMON_EXPORT static unsigned maxJumpSize(unsigned addr_width);
00832 
00833   COMMON_EXPORT bool getUsedRegs(pdvector<int> &regs);
00834   
00835   bool isCall() const { return type_ & IS_CALL; }
00836   bool isCallIndir() const { return (type_ & IS_CALL) && (type_ & INDIR); }
00837   bool isReturn() const { return (type_ & IS_RET) || (type_ & IS_RETF); }
00838   bool isRetFar() const { return type_ & IS_RETF; }
00839   bool isCleaningRet() const {return type_ & IS_RETC; }
00840   bool isJumpIndir() const { return (type_ & IS_JUMP) && (type_ & INDIR); }
00841   bool isJumpDir() const
00842     { return !(type_ & INDIR) && ((type_ & IS_JUMP) || (type_ & IS_JCC)); }
00843   bool isUncondJump() const
00844     { return ((type_ & IS_JUMP) && !(type_ & IS_JCC)); }
00845   bool isNop() const;
00846   bool isIndir() const { return type_ & INDIR; }
00847   bool isIllegal() const { return type_ & ILLEGAL; }
00848   bool isLeave() const { return *ptr_ == 0xC9; }  
00849   bool isPrivileged() const { return (type_ & PRVLGD); }
00850   bool isMoveRegMemToRegMem() const 
00851     { const unsigned char* p = op_ptr_ ? op_ptr_ : ptr_;
00852       return *p == MOV_R8_TO_RM8   || *p == MOV_R16_TO_RM16 ||
00853              *p == MOV_R32_TO_RM32 || *p ==  MOV_RM8_TO_R8  ||
00854              *p == MOV_RM16_TO_R16 || *p == MOV_RM32_TO_R32;   }
00855   bool isXORRegMemRegMem() const
00856       { const unsigned char* p = op_ptr_ ? op_ptr_ : ptr_;
00857         return *p == XOR_RM16_R16 || *p ==  XOR_RM32_R32 ||
00858                *p ==  XOR_R8_RM8  || *p ==  XOR_R16_RM16 ||
00859                *p == XOR_R32_RM32; }
00860   bool isANearBranch() const { return isJumpDir(); }
00861 
00862   bool isTrueCallInsn() const { return (isCall() && !isCallIndir()); }
00863   bool isSysCallInsn() const { return op_ptr_[0] == SYSCALL[0] &&
00864                                    op_ptr_[1] == SYSCALL[1]; }
00865 
00866   static bool isAligned(const Address ) { return true; }
00867 
00868   bool isCmp() const;
00869 
00870   void print()
00871   {
00872       for (unsigned i = 0; i < size_; i++)
00873       fprintf(stderr, " %02x", *(ptr_ + i));
00874       fprintf(stderr, "\n");
00875   }
00876           
00877   int getStackDelta();
00878 
00879  private:
00880   unsigned type_;   // type of the instruction (e.g. IS_CALL | INDIR)
00881   unsigned size_;   // size in bytes
00882   const unsigned char *ptr_;       // pointer to the instruction
00883   const unsigned char *op_ptr_;    // pointer to the opcode
00884 };
00885 
00886 /** Only appropriate for call/jump functions **/
00887 COMMON_EXPORT int get_disp(instruction *insn);
00888 int set_disp(bool setDisp, instruction *insn, int newOffset, bool outOfFunc);
00889 int displacement(const unsigned char *instr, unsigned type);
00890 
00891 /** Returns the immediate operand of an instruction **/
00892 Address get_immediate_operand(instruction *instr);
00893 
00894 COMMON_EXPORT int count_prefixes(unsigned insnType);
00895 
00896 inline bool is_disp8(long disp) {
00897    return (disp >= -128 && disp < 127);
00898 }
00899 
00900 inline bool is_disp16(long disp) {
00901    return (disp >= -32768 && disp < 32767);
00902 }
00903 
00904 inline bool is_disp32(long disp) {
00905   return (disp <= I32_MAX && disp >= I32_MIN);
00906 }
00907 inline bool is_disp32(Address a1, Address a2) {
00908   return is_disp32(a2 - (a1 + JUMP_REL32_SZ));
00909 }
00910 inline bool is_addr32(Address addr) {
00911     return (addr < UI32_MAX);
00912 }
00913 
00914 COMMON_EXPORT void decode_SIB(unsigned sib, unsigned& scale, Register& index_reg, Register& base_reg);
00915 COMMON_EXPORT const unsigned char* skip_headers(const unsigned char*, ia32_prefixes* = NULL);
00916 
00917 /* addresses on x86 don't have to be aligned */
00918 /* Address bounds of new dynamic heap segments.  On x86 we don't try
00919 to allocate new segments near base tramps, so heap segments can be
00920 allocated anywhere (the tramp address "x" is ignored). */
00921 inline Address region_lo(const Address /*x*/) { return 0x00000000; }
00922 inline Address region_hi(const Address /*x*/) { return 0xf0000000; }
00923 
00924 #if defined(arch_x86_64)
00925 // range functions for AMD64
00926 
00927 inline Address region_lo_64(const Address x) { return x & 0xffffffff80000000; }
00928 inline Address region_hi_64(const Address x) { return x | 0x000000007fffffff; }
00929 
00930 #endif
00931 
00932 COMMON_EXPORT bool insn_hasSIB(unsigned,unsigned&,unsigned&,unsigned&);
00933 COMMON_EXPORT bool insn_hasDisp8(unsigned ModRM);
00934 COMMON_EXPORT bool insn_hasDisp32(unsigned ModRM);
00935 
00936 COMMON_EXPORT bool isStackFramePrecheck_msvs( const unsigned char *buffer );
00937 COMMON_EXPORT bool isStackFramePrecheck_gcc( const unsigned char *buffer );
00938 
00939 }; // namespace arch_x86
00940 
00941 #endif
00942 
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

Generated on 12 Jul 2013 for SymtabAPI by  doxygen 1.6.1