diff -Naur xnu-1504.9.26.orig/Makefile xnu-1504.9.26/Makefile --- xnu-1504.9.26.orig/Makefile 2011-01-06 11:45:39.000000000 -0500 +++ xnu-1504.9.26/Makefile 2011-01-09 16:00:20.000000000 -0500 @@ -5,13 +5,13 @@ export SRCROOT=$(shell /bin/pwd) endif ifndef OBJROOT -export OBJROOT=$(SRCROOT)/BUILD/obj/ +export OBJROOT=$(SRCROOT)/BUILD/obj endif ifndef DSTROOT -export DSTROOT=$(SRCROOT)/BUILD/dst/ +export DSTROOT=$(SRCROOT)/BUILD/dst endif ifndef SYMROOT -export SYMROOT=$(SRCROOT)/BUILD/sym/ +export SYMROOT=$(SRCROOT)/BUILD/sym endif export MakeInc_cmd=${VERSDIR}/makedefs/MakeInc.cmd diff -Naur xnu-1504.9.26.orig/bsd/bsm/audit_kevents.h xnu-1504.9.26/bsd/bsm/audit_kevents.h --- xnu-1504.9.26.orig/bsd/bsm/audit_kevents.h 2011-01-06 11:45:25.000000000 -0500 +++ xnu-1504.9.26/bsd/bsm/audit_kevents.h 2011-01-09 16:00:20.000000000 -0500 @@ -667,6 +667,7 @@ /* * Possible desired future values based on review of BSD/Darwin system calls. */ +#define AUE_MAPTEXTSEGSYS AUE_NULL #define AUE_ATGETMSG AUE_NULL #define AUE_ATPUTMSG AUE_NULL #define AUE_ATSOCKET AUE_NULL diff -Naur xnu-1504.9.26.orig/bsd/conf/Makefile.x86_64 xnu-1504.9.26/bsd/conf/Makefile.x86_64 --- xnu-1504.9.26.orig/bsd/conf/Makefile.x86_64 2011-01-06 11:45:39.000000000 -0500 +++ xnu-1504.9.26/bsd/conf/Makefile.x86_64 2011-01-09 16:00:20.000000000 -0500 @@ -36,7 +36,8 @@ drv_dep.o \ sdt_x86.o \ dtrace_isa.o \ - aes_modes.o + aes_modes.o \ + ubc_subr.o OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS)) diff -Naur xnu-1504.9.26.orig/bsd/conf/files xnu-1504.9.26/bsd/conf/files --- xnu-1504.9.26.orig/bsd/conf/files 2011-01-06 11:45:39.000000000 -0500 +++ xnu-1504.9.26/bsd/conf/files 2011-01-09 16:00:20.000000000 -0500 @@ -547,6 +547,8 @@ bsd/kern/sysv_msg.c standard bsd/kern/mach_fat.c standard bsd/kern/mach_loader.c standard +bsd/kern/dyld.c standard +bsd/kern/disasm.c standard bsd/kern/posix_sem.c standard bsd/kern/posix_shm.c standard # XXXdbg - I need this in the journaling and block cache code diff -Naur xnu-1504.9.26.orig/bsd/kern/disasm.c xnu-1504.9.26/bsd/kern/disasm.c --- xnu-1504.9.26.orig/bsd/kern/disasm.c 1969-12-31 19:00:00.000000000 -0500 +++ xnu-1504.9.26/bsd/kern/disasm.c 2011-01-09 16:00:20.000000000 -0500 @@ -0,0 +1,1505 @@ +/* + * instruction length decoder (written by kaitek, modified by mercurysquad) + * voodoo xnu kernel + * + * based on code from AntiHookExec 1.00, Copyright (c) 2004 Chew Keong TAN + * opcode tables based on documentation from http://www.sandpile.org/ + * + * todo: * support for instruction set extensions newer than SSSE3 + * * verify that VT instructions are correctly decoded + */ + +#include +#include + +#include + +#include +#include + +#include + +#include +#include + +#include + +#include "disasm.h" + +#define OP_HAS_MODRM (1 << 0) +#define OP_PREFIX (1 << 1) +#define OP_REX (1 << 2) +#define OP_TWOBYTE (1 << 3) +#define OP_THREEBYTE_38 (1 << 4) +#define OP_THREEBYTE_3A (1 << 5) +#define OP_HAS_IMM8 (1 << 6) +#define OP_HAS_IMM16 (1 << 7) +#define OP_HAS_IMM32 (1 << 8) +#define OP_HAS_IMM64 (1 << 9) +#define OP_CHECK_66 (1 << 10) +#define OP_CHECK_67 (1 << 11) +#define OP_CHECK_REX (1 << 12) +#define OP_HAS_DISP8 (1 << 13) +#define OP_HAS_DISP16 (1 << 14) +#define OP_HAS_DISP32 (1 << 15) +#define OP_UNDEFINED (1 << 16) +#define OP_IA32_ONLY (1 << 17) +#define OP_NEEDS_PATCH (1 << 18) +#define OP_SPECIAL (1 << 19) + +#define OP_GROUP(n) ((n & 0xff) << 24) +#define OP_GROUP_MASK (0xff << 24) +#define OP_GROUP_EXTRACT(n) ((n >> 24) & 0xff) + +#define OP_OPERANDS (OP_HAS_MODRM|OP_HAS_IMM8|OP_HAS_IMM16|OP_HAS_IMM32| \ +OP_HAS_IMM64|OP_CHECK_66|OP_CHECK_67|OP_CHECK_REX| \ +OP_HAS_DISP8|OP_HAS_DISP16|OP_HAS_DISP32) + +#define PREF_NONE (1 << 0) // used for SSE opcodes with no prefix +#define PREF_F0 (1 << 1) // LOCK +#define PREF_F2 (1 << 2) // REPNE (or SSE) +#define PREF_F3 (1 << 3) // REP (or SSE) +#define PREF_2E (1 << 4) // CS segment +#define PREF_36 (1 << 5) // SS segment +#define PREF_3E (1 << 6) // DS segment +#define PREF_26 (1 << 7) // ES segment +#define PREF_64 (1 << 8) // FS segment +#define PREF_65 (1 << 9) // GS segment +#define PREF_66 (1 << 10) // operand size (or SSE) +#define PREF_67 (1 << 11) // address size +#define PREF_REX (1 << 12) // REX byte (default operand size) +#define PREF_REX_W (1 << 13) // REX byte (64-bit operand size) + +#define PREF_SSE_ALL (PREF_NONE|PREF_F3|PREF_66|PREF_F2) + +#define min(x,y) ((x < y) ? (x) : (y)) + +uint32_t prefix_table[256] = +{ + [0xf0] = PREF_F0, [0xf2] = PREF_F2, [0xf3] = PREF_F3, [0x2e] = PREF_2E, + [0x36] = PREF_36, [0x3e] = PREF_3E, [0x26] = PREF_26, [0x64] = PREF_64, + [0x65] = PREF_65, [0x66] = PREF_66, [0x67] = PREF_67, + + [0x40 ... 0x47] = PREF_REX, // operand size unchanged + [0x48 ... 0x4f] = PREF_REX_W, // 64-bit operand size +}; + +// note: some instructions (such as VT in groups 7 and 9), are distinguished not only by different +// reg values but by different r/m values -- this can be safely ignored for the purposes of +// length decoding. + +enum { + GRP_1 = 1, GRP_2, GRP_3A, GRP_3B, + GRP_4, GRP_5, GRP_6, GRP_7, + GRP_8, GRP_9, GRP_10, GRP_11, + GRP_12, GRP_13, GRP_14, GRP_15, + GRP_16, GRP_17A, GRP_17B, +#ifdef EXTENDED_PATCHER + GRP_FISTTP +#endif +}; + +uint32_t group_table[][8] = // inherits from parent table +{ + [GRP_1] = { // group 1 (80..83) + [0 ... 7] = OP_HAS_MODRM, // ADD, OR, ADC, SBB, AND, SUB, XOR, CMP + }, + [GRP_2] = { // group 2 (C0..C1, D0..D3) + [0 ... 7] = OP_HAS_MODRM, // ROL, ROR, RCL, RCR, SHL, SHR, SAL, SAR + }, + [GRP_3A] = { // group 3a (F6) + [0 ... 1] = OP_HAS_MODRM|OP_HAS_IMM8, // TEST Ib/Iz, TEST Ib/Iz + [2 ... 7] = OP_HAS_MODRM // NOT, NEG, {MUL,IMUL,DIV,IDIV} AL/rAX + }, + [GRP_3B] = { // group 3b (F7) + [0 ... 1] = OP_HAS_MODRM|OP_CHECK_66, // TEST Ib/Iz, TEST Ib/Iz + [2 ... 7] = OP_HAS_MODRM // NOT, NEG, {MUL,IMUL,DIV,IDIV} AL/rAX + }, + [GRP_4] = { // group 4 (FE) + [0 ... 1] = OP_HAS_MODRM, // {INC,DEC} Eb + [2 ... 7] = OP_UNDEFINED + }, + [GRP_5] = { // group 5 (FF) + [0 ... 3] = OP_HAS_MODRM, // {INC,DEC} Ev, CALL {Ev,Mp} +#ifdef EXTENDED_PATCHER + [4 ... 5] = OP_HAS_MODRM|OP_SPECIAL, // JMP {Ev,Mp} +#else + [4 ... 5] = OP_HAS_MODRM, // JMP {Ev,Mp} +#endif + [6] = OP_HAS_MODRM, // PUSH Ev + [7] = OP_UNDEFINED + }, + [GRP_6] = { // group 6 (0F 00) + [0 ... 5] = OP_HAS_MODRM|OP_SPECIAL, // {SLDT,STR,LLDT,LTR,VERR,VERW} {Mw,Rv} + [6 ... 7] = OP_UNDEFINED + }, + [GRP_7] = { // group 7 (0F 01) + [0 ... 4] = OP_HAS_MODRM|OP_SPECIAL, // {SGDT,SIDT,LGDT,LIDT} Ms, SMSW Mw + [5] = OP_UNDEFINED, + [6 ... 7] = OP_HAS_MODRM|OP_SPECIAL // LMSW {Mw,Rv}, INVLPG M (also: SWAPGS/RDTSCP) + }, + [GRP_8] = { // group 8 (0F BA) + [0 ... 3] = OP_UNDEFINED, + [4 ... 7] = OP_HAS_MODRM|OP_HAS_IMM8 // BT, BTS, BTR, BTC + }, + [GRP_9] = { // group 9 (0F C7) + [0] = OP_UNDEFINED, + [1] = OP_HAS_MODRM, // CMPXCHG Mq + [2 ... 5] = OP_UNDEFINED, + [6 ... 7] = OP_HAS_MODRM // todo: VT instructions with prefixes + }, + [GRP_10] = { // group 10 (8F) + [0] = OP_HAS_MODRM, // POP Ev + [1 ... 7] = OP_HAS_MODRM + }, + [GRP_11] = { // group 11 (0F B9) + [0 ... 7] = 0 // UD2 + }, + [GRP_12] = { // group 12 (C6..C7) + [0] = OP_HAS_MODRM, // MOV + [1 ... 7] = OP_HAS_MODRM + }, + [GRP_13] = { // group 13 (0F 71) + [0 ... 1] = OP_UNDEFINED, + [2] = OP_HAS_MODRM|OP_HAS_IMM8, // PSRLW {PRq,VRo},Ib + [3] = OP_UNDEFINED, + [4] = OP_HAS_MODRM|OP_HAS_IMM8, // PSRAW {PRq,VRo},Ib + [5] = OP_UNDEFINED, + [6] = OP_HAS_MODRM|OP_HAS_IMM8, // PSLLW {PRq,VRo},Ib + [7] = OP_UNDEFINED + }, + [GRP_14] = { // group 14 (0F 72) + [0 ... 1] = OP_UNDEFINED, + [2] = OP_HAS_MODRM|OP_HAS_IMM8, // PSRLD {PRq,VRo},Ib + [3] = OP_UNDEFINED, + [4] = OP_HAS_MODRM|OP_HAS_IMM8, // PSRAD {PRq,VRo},Ib + [5] = OP_UNDEFINED, + [6] = OP_HAS_MODRM|OP_HAS_IMM8, // PSLLD {PRq,VRo},Ib + [7] = OP_UNDEFINED + }, + [GRP_15] = { // group 15 (0F 73) + [0 ... 1] = OP_UNDEFINED, + [2 ... 3] = OP_HAS_MODRM|OP_HAS_IMM8, // PSRLQ {PRq,VRo},Ib / PSRLDQ VRo,Ib + [4 ... 5] = OP_UNDEFINED, + [6 ... 7] = OP_HAS_MODRM|OP_HAS_IMM8 // PSLLQ {PRq,VRo},Ib / PSLLDQ Vro,Ib + }, + [GRP_16] = { // group 16 (0F AE) -- todo: test XRSTOR M and LFENCE (and CLFLUSH M and SFENCE) + [0 ... 7] = OP_HAS_MODRM, // FX{SAVE,RSTOR} M512 / {LD,ST}MXCSR Md / + // X{SAVE,RSTOR} M or LFENCE / MFENCE / CLFLUSH M or SFENCE + }, + [GRP_17A] = { // group 17a (0F 18) + [0 ... 3] = OP_HAS_MODRM, // PREFETCH{NTA,T0,T1,T2} M + [4 ... 7] = OP_HAS_MODRM // HINT_NOP Ev + }, + [GRP_17B] = { // group 17b (0F 19..1F) + [0 ... 7] = OP_HAS_MODRM // HINT_NOP Ev + }, +#ifdef EXTENDED_PATCHER + [GRP_FISTTP] = { // (DF, DB, DD) + [0] = OP_HAS_MODRM, + [1] = OP_HAS_MODRM|OP_NEEDS_PATCH, // FISTTP + [2 ... 7] = OP_HAS_MODRM + } +#endif +}; + +uint32_t one_byte_table[256] = +{ + OP_HAS_MODRM|OP_SPECIAL, // 00: ADD Eb,Gb + OP_HAS_MODRM, // 01: ADD Ev,Gv + OP_HAS_MODRM, // 02: ADD Gb,Eb + OP_HAS_MODRM, // 03: ADD Gv,Ev + OP_HAS_IMM8, // 04: ADD AL,Ib + OP_CHECK_66, // 05: ADD rAX,Iz + OP_IA32_ONLY|OP_SPECIAL, // 06: PUSH ES + OP_IA32_ONLY|OP_SPECIAL, // 07: POP ES + + OP_HAS_MODRM, // 08: OR Eb,Gb + OP_HAS_MODRM, // 09: OR Ev,Gv + OP_HAS_MODRM, // 0A: OR Gb,Eb + OP_HAS_MODRM, // 0B: OR Gv,Ev + OP_HAS_IMM8, // 0C: OR AL,Ib + OP_CHECK_66, // 0D: OR rAX,Iz + OP_IA32_ONLY|OP_SPECIAL, // 0E: PUSH CS + OP_TWOBYTE, // 0F: 2-byte escape + + OP_HAS_MODRM, // 10: ADC Eb,Gb + OP_HAS_MODRM, // 11: ADC Ev,Gv + OP_HAS_MODRM, // 12: ADC Gb,Eb + OP_HAS_MODRM, // 13: ADC Gv,Ev + OP_HAS_IMM8, // 14: ADC AL,Ib + OP_CHECK_66, // 15: ADC rAX,Iz + OP_IA32_ONLY|OP_SPECIAL, // 16: PUSH SS + OP_IA32_ONLY|OP_SPECIAL, // 17: POP SS + + OP_HAS_MODRM, // 18: SBB Eb,Gb + OP_HAS_MODRM, // 19: SBB Ev,Gv + OP_HAS_MODRM, // 1A: SBB Gb,Eb + OP_HAS_MODRM, // 1B: SBB Gv,Ev + OP_HAS_IMM8, // 1C: SBB AL,Ib + OP_CHECK_66, // 1D: SBB rAX,Iz + OP_IA32_ONLY|OP_SPECIAL, // 1E: PUSH DS + OP_IA32_ONLY|OP_SPECIAL, // 1F: POP DS + + OP_HAS_MODRM, // 20: AND Eb,Gb + OP_HAS_MODRM, // 21: AND Ev,Gv + OP_HAS_MODRM, // 22: AND Gb,Eb + OP_HAS_MODRM, // 23: AND Gv,Ev + OP_HAS_IMM8, // 24: AND AL,Ib + OP_CHECK_66, // 25: AND rAX,Iz + OP_PREFIX, // 26: ES prefix + OP_IA32_ONLY|OP_SPECIAL, // 27: DAA + + OP_HAS_MODRM, // 28: SUB Eb,Gb + OP_HAS_MODRM, // 29: SUB Ev,Gv + OP_HAS_MODRM, // 2A: SUB Gb,Eb + OP_HAS_MODRM, // 2B: SUB Gv,Ev + OP_HAS_IMM8, // 2C: SUB AL,Ib + OP_CHECK_66, // 2D: SUB rAX,Iz + OP_PREFIX, // 2E: CS prefix (hint not taken for Jcc) + OP_IA32_ONLY|OP_SPECIAL, // 2F: DAS + + OP_HAS_MODRM, // 30: XOR Eb,Gb + OP_HAS_MODRM, // 31: XOR Ev,Gv + OP_HAS_MODRM, // 32: XOR Gb,Eb + OP_HAS_MODRM, // 33: XOR Gv,Ev + OP_HAS_IMM8, // 34: XOR AL,Ib + OP_CHECK_66, // 35: XOR rAX,Iz + OP_PREFIX, // 36: SS prefix + OP_IA32_ONLY|OP_SPECIAL, // 37: AAA + + OP_HAS_MODRM, // 38: CMP Eb,Gb + OP_HAS_MODRM, // 39: CMP Ev,Gv + OP_HAS_MODRM, // 3A: CMP Gb,Eb + OP_HAS_MODRM, // 3B: CMP Gv,Ev + OP_HAS_IMM8, // 3C: CMP AL,Ib + OP_CHECK_66, // 3D: CMP rAX,Iz + OP_PREFIX, // 3E: DS prefix (hint taken for Jcc) + OP_IA32_ONLY|OP_SPECIAL, // 3F: AAS + + /* note: the single-byte opcode forms of the INC/DEC instructions do not exist + * in the x86-64 instruction set, but rather are reassigned for use as the REX + * prefix. for the purposes of length decoding, we only need to check whether + * the fourth bit in the REX byte is set, which is the case for 48 to 4F. */ + + OP_REX, // 40: INC eAX + OP_REX, // 41: INC eCX + OP_REX, // 42: INC eDX + OP_REX, // 43: INC eBX + OP_REX, // 44: INC eSP + OP_REX, // 45: INC eBP + OP_REX, // 46: INC eSI + OP_REX, // 47: INC eDI + + OP_REX, // 48: DEC eAX + OP_REX, // 49: DEC eCX + OP_REX, // 4A: DEC eDX + OP_REX, // 4B: DEC eBX + OP_REX, // 4C: DEC eSP + OP_REX, // 4D: DEC eBP + OP_REX, // 4E: DEC eSI + OP_REX, // 4F: DEC eDI + + 0, // 50: POP rAX + 0, // 51: POP rCX + 0, // 52: POP rDX + 0, // 53: POP rBX + 0, // 54: POP rSP + 0, // 55: POP rBP + 0, // 56: POP rSI + 0, // 57: POP rDI + + 0, // 58: PUSH rAX + 0, // 59: PUSH rCX + 0, // 5A: PUSH rDX + 0, // 5B: PUSH rBX + 0, // 5C: PUSH rSP + 0, // 5D: PUSH rBP + 0, // 5E: PUSH rSI + 0, // 5F: PUSH rDI + + OP_IA32_ONLY, // 60: PUSH{A,AD} + OP_IA32_ONLY, // 61: POP{A,AD} + OP_IA32_ONLY|OP_HAS_MODRM, // 62: BOUND Gv,Ma + OP_HAS_MODRM|OP_SPECIAL, // 63: ARPL Ew,Gw (MOVSXD Gv,Ed for x86-64) + OP_PREFIX, // 64: FS prefix + OP_PREFIX, // 65: GS prefix (hint alt taken for Jcc) + OP_PREFIX, // 66: operand size prefix + OP_PREFIX, // 67: address size prefix + + OP_CHECK_66, // 68: PUSH Iz + OP_HAS_MODRM|OP_CHECK_66, // 69: IMUL Gv,Ev,Iz + OP_HAS_IMM8, // 6A: PUSH Ib + OP_HAS_MODRM|OP_HAS_IMM8, // 6B: IMUL Gv,Ev,Ib + 0, // 6C: IN{S,SB} Yb,DX + 0, // 6D: IN{SW,SD} Yz,DX + 0, // 6E: OUT{S,SB} DX,Xb + 0, // 6F: OUT{S,SW,SD} DX,Xz + + OP_HAS_IMM8, // 70: JO Jb + OP_HAS_IMM8, // 71: JNO Jb + OP_HAS_IMM8, // 72: J{B,NAE,C} Jb + OP_HAS_IMM8, // 73: J{NB,AE,NC} Jb + OP_HAS_IMM8, // 74: J{Z,E} Jb + OP_HAS_IMM8, // 75: J{NZ,NE} Jb + OP_HAS_IMM8, // 76: J{BE,NA} Jb + OP_HAS_IMM8, // 77: J{NBE,A} Jb + + OP_HAS_IMM8, // 78: JS Jb + OP_HAS_IMM8, // 79: JNS Jb + OP_HAS_IMM8, // 7A: J{P,PE} Jb + OP_HAS_IMM8, // 7B: J{NP,PO} Jb + OP_HAS_IMM8, // 7C: J{L,NGE} Jb + OP_HAS_IMM8, // 7D: J{NL,GE} Jb + OP_HAS_IMM8, // 7E: J{LE,NG} Jb + OP_HAS_IMM8, // 7F: J{NLE,G} Jb + + OP_GROUP(GRP_1)|OP_HAS_IMM8, // 80: group 1 (Eb,Ib) + OP_GROUP(GRP_1)|OP_CHECK_66, // 81: group 1 (Ev,Iz) + OP_IA32_ONLY|OP_GROUP(GRP_1)|OP_HAS_IMM8, // 82: group 1 (Eb,Ib) [alias] + OP_GROUP(GRP_1)|OP_HAS_IMM8, // 83: group 1 (Ev,Ib) + OP_HAS_MODRM, // 84: TEST Eb,Gb + OP_HAS_MODRM, // 85: TEST Ev,Gv + OP_HAS_MODRM, // 86: XCHG Eb,Gb + OP_HAS_MODRM, // 87: XCHG Ev,Gv + + OP_HAS_MODRM, // 88: MOV Eb,Gb + OP_HAS_MODRM, // 89: MOV Ev,Gv + OP_HAS_MODRM, // 8A: MOV Gb,Eb + OP_HAS_MODRM, // 8B: MOV Gv,Ev + OP_HAS_MODRM, // 8C: MOV {Mw,Rv},Sw + OP_HAS_MODRM, // 8D: LEA Gv,M + OP_HAS_MODRM, // 8E: MOV Sw,{Mw,Rv} + OP_GROUP(GRP_10), // 8F: group 10 + + OP_SPECIAL, // 90: NOP / PAUSE (with F3 prefix) + 0, // 91: XCHG rCX,rAX + 0, // 92: XCHG rDX,rAX + 0, // 93: XCHG rBX,rAX + 0, // 94: XCHG rSP,rAX + 0, // 95: XCHG rBP,rAX + 0, // 96: XCHG rSI,rAX + 0, // 97: XCHG rDI,rAX + + 0, // 98: C{BW,WDE} + 0, // 99: C{WD,DQ} + OP_IA32_ONLY|OP_CHECK_66|OP_HAS_IMM16, // 9A: CALL Ap + 0, // 9B: {,F}WAIT + 0, // 9C: PUSH{F,FD} Fv + 0, // 9D: POP{F,FD} Fv + 0, // 9E: SAHF + 0, // 9F: LAHF + + OP_CHECK_67, // A0: MOV AL,Ob + OP_CHECK_67, // A1: MOV rAX,Ov + OP_CHECK_67, // A2: MOV Ob,AL + OP_CHECK_67, // A3: MOV Ov,rAX + 0, // A4: MOV{S,SB} Yb,Xb + 0, // A5: MOV{S,SW,SD} Yv,Xv + 0, // A6: CMP{S,SB} Yb,Xb + 0, // A7: CMP{S,SW,SD} Yv,Xv + + OP_HAS_IMM8, // A8: TEST AL,Ib + OP_CHECK_66, // A9: TEST rAX,Iz + 0, // AA: STO{S,SB} Yb,AL + 0, // AB: STO{S,SW,SD} Yv,rAX + 0, // AC: LOD{S,SB} AL,Xb + 0, // AD: LOD{S,SW,SD} rAX,Xv + 0, // AE: SCA{S,SB} Yb,AL + 0, // AF: SCA{S,SW,SD} Yv,rAX + + OP_HAS_IMM8, // B0: MOV AL,Ib + OP_HAS_IMM8, // B1: MOV CL,Ib + OP_HAS_IMM8, // B2: MOV DL,Ib + OP_HAS_IMM8, // B3: MOV BL,Ib + OP_HAS_IMM8, // B4: MOV AH,Ib + OP_HAS_IMM8, // B5: MOV CH,Ib + OP_HAS_IMM8, // B6: MOV DH,Ib + OP_HAS_IMM8, // B7: MOV BH,Ib + + OP_CHECK_66|OP_CHECK_REX, // B8: MOV rAX,Iv + OP_CHECK_66|OP_CHECK_REX, // B9: MOV rCX,Iv + OP_CHECK_66|OP_CHECK_REX, // BA: MOV rDX,Iv + OP_CHECK_66|OP_CHECK_REX, // BB: MOV rBX,Iv + OP_CHECK_66|OP_CHECK_REX, // BC: MOV rSP,Iv + OP_CHECK_66|OP_CHECK_REX, // BD: MOV rBP,Iv + OP_CHECK_66|OP_CHECK_REX, // BE: MOV rSI,Iv + OP_CHECK_66|OP_CHECK_REX, // BF: MOV rDI,Iv + + OP_GROUP(GRP_2)|OP_HAS_IMM8, // C0: group 2 (Eb,Ib) + OP_GROUP(GRP_2)|OP_HAS_IMM8, // C1: group 2 (Ev,Ib) + OP_HAS_IMM16, // C2: RETN Iw + 0, // C3: RETN + OP_IA32_ONLY|OP_HAS_MODRM|OP_SPECIAL, // C4: LES Gz,Mp + OP_IA32_ONLY|OP_HAS_MODRM|OP_SPECIAL, // C5: LDS Gz,Mp + OP_GROUP(GRP_12)|OP_HAS_IMM8, // C6: group 12 (Eb,Ib) + OP_GROUP(GRP_12)|OP_CHECK_66, // C7: group 12 (Ev,Iz) + + OP_HAS_IMM16|OP_HAS_IMM8, // C8: ENTER Iw,Ib + 0, // C9: LEAVE + OP_HAS_IMM16, // CA: RETF Iw + 0, // CB: RETF + 0, // CC: INT3 + OP_HAS_IMM8, // CD: INT Ib + OP_IA32_ONLY, // CE: INTO + OP_SPECIAL, // CF: IRET + + OP_GROUP(GRP_2), // D0: group 2 (Eb,1) + OP_GROUP(GRP_2), // D1: group 2 (Ev,1) + OP_GROUP(GRP_2), // D2: group 2 (Eb,CL) + OP_GROUP(GRP_2), // D3: group 2 (Ev,CL) + OP_IA32_ONLY|OP_HAS_IMM8|OP_SPECIAL, // D4: AAM Ib + OP_IA32_ONLY|OP_HAS_IMM8|OP_SPECIAL, // D5: AAD Ib + OP_IA32_ONLY, // D6: SALC + 0, // D7: XLAT{,B} + +#ifdef EXTENDED_PATCHER + OP_HAS_MODRM, // D8: ESC to coprocessor + OP_HAS_MODRM, // D9: ESC to coprocessor + OP_HAS_MODRM, // DA: ESC to coprocessor + OP_GROUP(GRP_FISTTP), // DB: ESC to coprocessor + OP_HAS_MODRM, // DC: ESC to coprocessor + OP_GROUP(GRP_FISTTP), // DD: ESC to coprocessor + OP_HAS_MODRM, // DE: ESC to coprocessor + OP_GROUP(GRP_FISTTP), // DF: ESC to coprocessor +#else + OP_HAS_MODRM, // D8: ESC to coprocessor + OP_HAS_MODRM, // D9: ESC to coprocessor + OP_HAS_MODRM, // DA: ESC to coprocessor + OP_HAS_MODRM, // DB: ESC to coprocessor + OP_HAS_MODRM, // DC: ESC to coprocessor + OP_HAS_MODRM, // DD: ESC to coprocessor + OP_HAS_MODRM, // DE: ESC to coprocessor + OP_HAS_MODRM, // DF: ESC to coprocessor +#endif + + OP_HAS_IMM8, // E0: LOOP{NE,NZ} Jb + OP_HAS_IMM8, // E1: LOOP{E,Z} Jb + OP_HAS_IMM8, // E2: LOOP Jb + OP_HAS_IMM8, // E3: J{CXZ,ECX} Jb + OP_HAS_IMM8, // E4: IN AL,Ib + OP_HAS_IMM8, // E5: IN eAX,Ib + OP_HAS_IMM8, // E6: OUT Ib,AL + OP_HAS_IMM8, // E7: OUT Ib,eAX + + OP_CHECK_66, // E8: CALL Jz + OP_CHECK_66, // E9: JMP Jz +#ifdef EXTENDED_PATCHER + OP_IA32_ONLY|OP_CHECK_66|OP_HAS_IMM16|OP_SPECIAL, // EA: JMP Ap +#else + OP_IA32_ONLY|OP_CHECK_66|OP_HAS_IMM16, // EA: JMP Ap +#endif + OP_HAS_IMM8, // EB: JMP Jb + 0, // EC: IN AL,DX + 0, // ED: IN eAX,DX + 0, // EE: OUT DX,AL + 0, // EF: OUT DX,eAX + + OP_PREFIX, // F0: LOCK + 0, // F1: INT1 + OP_PREFIX, // F2: REPNE + OP_PREFIX, // F3: REP{,E} + 0, // F4: HLT + 0, // F5: CMC + OP_GROUP(GRP_3A), // F6: group 3 (Eb) + OP_GROUP(GRP_3B), // F7: group 3 (Ev) + + 0, // F8: CLC + 0, // F9: STC + 0, // FA: CLI + 0, // FB: STI + 0, // FC: CLD + 0, // FD: STD + OP_GROUP(GRP_4), // FE: group 4 + OP_GROUP(GRP_5) // FF: group 5 +}; + +typedef struct { + uint32_t flags; + uint32_t prefixes; +} ext_opcode_t; + +ext_opcode_t two_byte_table[256] = { + { OP_GROUP(GRP_6), 0 }, // 00: group 6 + { OP_GROUP(GRP_7), 0 }, // 01: group 7 + { OP_HAS_MODRM|OP_SPECIAL, 0 }, // 02: LAR Gv,Ew + { OP_HAS_MODRM|OP_SPECIAL, 0 }, // 03: LSL Gv,Ew + { OP_UNDEFINED, 0 }, // 04 + { 0, 0 }, // 05: SYSCALL + { OP_SPECIAL, 0 }, // 06: CLTS + { OP_SPECIAL, 0 }, // 07: SYSRET + + { OP_SPECIAL, 0 }, // 08: INVD + { OP_SPECIAL, 0 }, // 09: WBINVD + { OP_UNDEFINED, 0 }, // 0A + { 0, 0 }, // 0B: UD2 + { OP_UNDEFINED, 0 }, // 0C + { OP_HAS_MODRM, 0 }, // 0D: PREFETCHx M + { 0, 0 }, // 0E: FEMMS + { OP_UNDEFINED, 0 }, // 0F (3DNow!) + + { OP_HAS_MODRM, PREF_SSE_ALL }, // 10: MOV{UP,S}S V{o,d},W{o,d} / MOV{UP,S}D V{o,q},W{o,q} + { OP_HAS_MODRM, PREF_SSE_ALL }, // 11: MOV{UP,S}S W{o,d},V{o,d} / MOV{UP,S}D W{o,q},V{o,q} + { OP_HAS_MODRM, PREF_SSE_ALL }, // 12: MOV{L,HL}PS Vq,{M,VR}q / MOVSLDUP Vo,Wo / MOVLPD Vq,Mq / MOVDDUP Vo,Wq + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 13: MOVLP{S,D} Mq,Vq + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 14: UNPCKLP{S,D} Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 15: UNPCKHP{S,D} Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_F3|PREF_66 }, // 16: MOV{H,LH}PS Vq,{M,VR}q / MOVSHDUP Vo,Wo / MOVHPD Vq,Mq + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 17: MOVHP{S,D} Mq,Vq + + { OP_GROUP(GRP_17A), 0 }, // 18: group 17 (PREFETCH{NTA,T0,T1,T2} and HINT_NOP) + { OP_GROUP(GRP_17B), 0 }, // 19: group 17 (HINT_NOP) + { OP_GROUP(GRP_17B), 0 }, // 1A: group 17 (HINT_NOP) + { OP_GROUP(GRP_17B), 0 }, // 1B: group 17 (HINT_NOP) + { OP_GROUP(GRP_17B), 0 }, // 1C: group 17 (HINT_NOP) + { OP_GROUP(GRP_17B), 0 }, // 1D: group 17 (HINT_NOP) + { OP_GROUP(GRP_17B), 0 }, // 1E: group 17 (HINT_NOP) + { OP_GROUP(GRP_17B), 0 }, // 1F: group 17 (HINT_NOP) + + { OP_HAS_MODRM|OP_SPECIAL, 0 }, // 20: MOV Rd,Cd + { OP_HAS_MODRM|OP_SPECIAL, 0 }, // 21: MOV Rd,Dd + { OP_HAS_MODRM|OP_SPECIAL, 0 }, // 22: MOV Cd,Rd + { OP_HAS_MODRM|OP_SPECIAL, 0 }, // 23: MOV Dd,Rd + { OP_HAS_MODRM|OP_SPECIAL, 0 }, // 24: MOV Rd,Td + { OP_UNDEFINED, 0 }, // 25 + { OP_HAS_MODRM|OP_SPECIAL, 0 }, // 26: MOV Td,Rd + { OP_UNDEFINED, 0 }, // 27 + + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 28: MOVAP{S,D} Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 29: MOVAP{S,D} Wo,Vo + { OP_HAS_MODRM, PREF_SSE_ALL }, // 2A: CVTPI2PS Vq,{M,P}q / CVTSI2SS Vd,Ed / CVTPI2PD Vo,{M,P}q / CVTSI2SD Vq,Ed + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 2B: MOVNTP{S,D} Mo,Vo + { OP_HAS_MODRM, PREF_SSE_ALL }, // 2C: CVTT{PS2PI,SS2SI} {Pq,Gd},W{q,d} / CVTT{PD2PI,SD2SI} {Pq,Gd},W{o,q} + { OP_HAS_MODRM, PREF_SSE_ALL }, // 2D: CVT{PS2PI,SS2SI} {Pq,Gd},W{q,d} / CVT{PD2PI,SD2SI} {Pq,Gd},W{o,q} + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 2E: UCOMIS{S,D} V{d,q},W{d,q} + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 2F: COMIS{S,D} V{d,q},W{d,q} + + { OP_SPECIAL, 0 }, // 30: WRMSR + { 0, 0 }, // 31: RDTSC + { OP_SPECIAL, 0 }, // 32: RDMSR + { 0, 0 }, // 33: RDPMC + { OP_NEEDS_PATCH, 0 }, // 34: SYSENTER + { OP_SPECIAL, 0 }, // 35: SYSEXIT + { OP_UNDEFINED, 0 }, // 36 + { OP_UNDEFINED, 0 }, // 37 + + { OP_THREEBYTE_38, 0 }, // 38: three-byte opcode + { OP_UNDEFINED, 0 }, // 39 + { OP_THREEBYTE_3A, 0 }, // 3A: three-byte opcode + { OP_UNDEFINED, 0 }, // 3B + { OP_UNDEFINED, 0 }, // 3C + { OP_UNDEFINED, 0 }, // 3D + { OP_UNDEFINED, 0 }, // 3E + { OP_UNDEFINED, 0 }, // 3F + + { OP_HAS_MODRM, 0 }, // 40: CMOVO Gv,Ev + { OP_HAS_MODRM, 0 }, // 41: CMOVNO Gv,Ev + { OP_HAS_MODRM, 0 }, // 42: CMOV{B,C,NAE} Gv,Ev + { OP_HAS_MODRM, 0 }, // 43: CMOV{AE,NB,NC} Gv,Ev + { OP_HAS_MODRM, 0 }, // 44: CMOV{E,Z} Gv,Ev + { OP_HAS_MODRM, 0 }, // 45: CMOV{NE,NZ} Gv,Ev + { OP_HAS_MODRM, 0 }, // 46: CMOV{BE,NA} Gv,Ev + { OP_HAS_MODRM, 0 }, // 47: CMOV{A,NBE} Gv,Ev + + { OP_HAS_MODRM, 0 }, // 48: CMOVS Gv,Ev + { OP_HAS_MODRM, 0 }, // 49: CMOVNS Gv,Ev + { OP_HAS_MODRM, 0 }, // 4A: CMOV{P,PE} Gv,Ev + { OP_HAS_MODRM, 0 }, // 4B: CMOV{NP,PO} Gv,Ev + { OP_HAS_MODRM, 0 }, // 4C: CMOV{L,NGE} Gv,Ev + { OP_HAS_MODRM, 0 }, // 4D: CMOV{NL,GE} Gv,Ev + { OP_HAS_MODRM, 0 }, // 4E: CMOV{LE,NG} Gv,Ev + { OP_HAS_MODRM, 0 }, // 4F: CMOV{NLE,G} Gv,Ev + + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 50: MOVMSKP{S,D} Gd,VRo + { OP_HAS_MODRM, PREF_SSE_ALL }, // 51: SQRT{P,S}S V{o,d},W{o,d} / SQRT{P,S}D V{o,q},W{o,q} + { OP_HAS_MODRM, PREF_NONE|PREF_F3 }, // 52: RSQRT{P,S}S V{o,d},W{o,d} + { OP_HAS_MODRM, PREF_NONE|PREF_F3 }, // 53: RCP{P,S}S V{o,d},W{o,d} + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 54: ANDP{S,D} Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 55: ANDNP{S,D} Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 56: ORP{S,D} Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 57: XORP{S,D} Vo,Wo + + { OP_HAS_MODRM, PREF_SSE_ALL }, // 58: ADD{P,S}S V{o,d},W{o,d} / ADD{P,S}D V{o,q},W{o,q} + { OP_HAS_MODRM, PREF_SSE_ALL }, // 59: MUL{P,S}S V{o,d},W{o,d} / MUL{P,S}D V{o,q},W{o,q} + { OP_HAS_MODRM, PREF_SSE_ALL }, // 5A: CVTPS2PD Vo,Wq / CVTSS2SD Vq,Wd / CVTPD2PS Vo,Wo / CVTSD2SS Vd,Wq + { OP_HAS_MODRM, PREF_NONE|PREF_F3|PREF_66 }, // 5B: CVT{DQ2PS,TPS2DQ,PS2DQ} Vo,Wo + { OP_HAS_MODRM, PREF_SSE_ALL }, // 5C: SUB{P,S}S V{o,d},W{o,d} / SUB{P,S}D V{o,q},W{o,q} + { OP_HAS_MODRM, PREF_SSE_ALL }, // 5D: MIN{P,S}S V{o,d},W{o,d} / MIN{P,S}D V{o,q},W{o,q} + { OP_HAS_MODRM, PREF_SSE_ALL }, // 5E: DIV{P,S}S V{o,d},W{o,d} / DIV{P,S}D V{o,q},W{o,q} + { OP_HAS_MODRM, PREF_SSE_ALL }, // 5F: MAX{P,S}S V{o,d},W{o,d} / MAX{P,S}D V{o,q},W{o,q} + + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 60: PUNPCKLBW Pq,Qd / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 61: PUNPCKLWD Pq,Qd / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 62: PUNPCKLDQ Pq,Qd / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 63: PACKSSWB Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 64: PCMPGTB Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 65: PCMPGTW Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 66: PCMPGTD Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 67: PACKUSWB Pq,Qq / Vo,Wo + + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 68: PUNPCKHBW Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 69: PUNPCKHWD Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 6A: PUNPCKHDQ Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 6B: PACKSSDW Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_66 }, // 6C: PUNPCKLQDQ Vo,Wq + { OP_HAS_MODRM, PREF_66 }, // 6D: PUNPCKHQDQ Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 6E: MOVD Pq,Ed / Vo,Ed + { OP_HAS_MODRM, PREF_NONE|PREF_F3|PREF_66 }, // 6F: MOVQ Pq,Qq / MOV{DQU,DQA} Vo,Wo + + { OP_HAS_MODRM|OP_HAS_IMM8, PREF_SSE_ALL }, // 70: PSHUFW Pq,Qq,Ib / PSHUF{HW,D,LW} Vo,Wo,Ib + { OP_GROUP(GRP_13), PREF_NONE|PREF_66 }, // 71: group 13 (PSHIMW) PS{RL,RA,LL}W {PRq,VRo},Ib + { OP_GROUP(GRP_14), PREF_NONE|PREF_66 }, // 72: group 14 (PSHIMD) PS{RL,RA,LL}D {PRq,VRo},Ib + { OP_GROUP(GRP_15), PREF_NONE|PREF_66 }, // 73: group 15 (PSHIMQ) PS{RL,LL}Q {PRq,VRo},Ib / PSRLDQ VRo,Ib + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 74: PCMPEQB Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 75: PCMPEQW Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 76: PCMPEQD Pq,Qq / Vo,Wo + { 0, PREF_NONE }, // 77: EMMS + + { OP_HAS_MODRM, 0 }, // 78: VMREAD E{d,q},G{d,q} + { OP_HAS_MODRM, 0 }, // 79: VMWRITE E{d,q},G{d,q} + { OP_UNDEFINED, 0 }, // 7A + { OP_UNDEFINED, 0 }, // 7B + { OP_HAS_MODRM, PREF_NONE|PREF_66|PREF_F2 }, // 7C: HADDP{D,S} Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66|PREF_F2 }, // 7D: HSUBP{D,S} Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_F3|PREF_66 }, // 7E: MOVD Ed,Pd / MOVQ V{o,q},{M,V}q / MOVD Ed,Vd + { OP_HAS_MODRM, PREF_NONE|PREF_F3|PREF_66 }, // 7F: MOVQ Qq,Pq / MOV{DQU,DQA} Wo,Vo + + { OP_CHECK_66, 0 }, // 80: JO Jv + { OP_CHECK_66, 0 }, // 81: JNO Jv + { OP_CHECK_66, 0 }, // 82: J{B,C,NAE} Jv + { OP_CHECK_66, 0 }, // 83: J{AE,NB,NC} Jv + { OP_CHECK_66, 0 }, // 84: J{E,Z} Jv + { OP_CHECK_66, 0 }, // 85: J{NE,NZ} Jv + { OP_CHECK_66, 0 }, // 86: J{BE,NA} Jv + { OP_CHECK_66, 0 }, // 87: J{A,NBE} Jv + + { OP_CHECK_66, 0 }, // 88: JS Jv + { OP_CHECK_66, 0 }, // 89: JNS Jv + { OP_CHECK_66, 0 }, // 8A: J{P,PE} Jv + { OP_CHECK_66, 0 }, // 8B: J{NP,PO} Jv + { OP_CHECK_66, 0 }, // 8C: J{L,NGE} Jv + { OP_CHECK_66, 0 }, // 8D: J{NL,GE} Jv + { OP_CHECK_66, 0 }, // 8E: J{LE,NG} Jv + { OP_CHECK_66, 0 }, // 8F: J{NLE,G} Jv + + { OP_HAS_MODRM, 0 }, // 90: SETO Eb + { OP_HAS_MODRM, 0 }, // 91: SETNO Eb + { OP_HAS_MODRM, 0 }, // 92: SET{B,C,NAE} Eb + { OP_HAS_MODRM, 0 }, // 93: SET{AE,NB,NC} Eb + { OP_HAS_MODRM, 0 }, // 94: SET{E,Z} Eb + { OP_HAS_MODRM, 0 }, // 95: SET{NE,NZ} Eb + { OP_HAS_MODRM, 0 }, // 96: SET{BE,NA} Eb + { OP_HAS_MODRM, 0 }, // 97: SET{A,NBE} Eb + + { OP_HAS_MODRM, 0 }, // 98: SETS Eb + { OP_HAS_MODRM, 0 }, // 99: SETNS Eb + { OP_HAS_MODRM, 0 }, // 9A: SET{P,PE} Eb + { OP_HAS_MODRM, 0 }, // 9B: SET{NP,PO} Eb + { OP_HAS_MODRM, 0 }, // 9C: SET{L,NGE} Eb + { OP_HAS_MODRM, 0 }, // 9D: SET{NL,GE} Eb + { OP_HAS_MODRM, 0 }, // 9E: SET{LE,NG} Eb + { OP_HAS_MODRM, 0 }, // 9F: SET{NLE,G} Eb + + { OP_SPECIAL, 0 }, // A0: PUSH FS + { OP_SPECIAL, 0 }, // A1: POP FS + { OP_NEEDS_PATCH, 0 }, // A2: CPUID + { OP_HAS_MODRM, 0 }, // A3: BT Ev,Gv + { OP_HAS_MODRM|OP_HAS_IMM8, 0 }, // A4: SHLD Ev,Gv,Ib + { OP_HAS_MODRM, 0 }, // A5: SHLD Ev,Gv,CL + { OP_UNDEFINED, 0 }, // A6 + { OP_UNDEFINED, 0 }, // A7 + + { OP_SPECIAL, 0 }, // A8: PUSH GS + { OP_SPECIAL, 0 }, // A9: POP GS + { OP_SPECIAL, 0 }, // AA: RSM + { OP_HAS_MODRM, 0 }, // AB: BTS Ev,Gv + { OP_HAS_MODRM|OP_HAS_IMM8, 0 }, // AC: SHRD Ev,Gv,Ib + { OP_HAS_MODRM, 0 }, // AD: SHRD Ev,Gv,CL + { OP_GROUP(GRP_16), 0 }, // AE: group 16 + { OP_HAS_MODRM, 0 }, // AF: IMUL Gv,Ev + + { OP_HAS_MODRM, 0 }, // B0: CMPXCHG Eb,Gb + { OP_HAS_MODRM, 0 }, // B1: CMPXCHG Ev,Gv + { OP_HAS_MODRM|OP_SPECIAL, 0 }, // B2: LSS Gz,Mp + { OP_HAS_MODRM, 0 }, // B3: BTR Ev,Gv + { OP_HAS_MODRM|OP_SPECIAL, 0 }, // B4: LFS Gz,Mp + { OP_HAS_MODRM|OP_SPECIAL, 0 }, // B5: LGS Gz,Mp + { OP_HAS_MODRM, 0 }, // B6: MOVZX Gv,Eb + { OP_HAS_MODRM, 0 }, // B7: MOVZX Gv,Ew + + { OP_HAS_MODRM, PREF_F3 }, // B8: POPCNT Pq,Qq + + { OP_GROUP(GRP_11), 0 }, // B9: group 11 + { OP_GROUP(GRP_8), 0 }, // BA: group 8 + { OP_HAS_MODRM, 0 }, // BB: BTC Ev,Gv + { OP_HAS_MODRM, 0 }, // BC: BSF Gv,Ev + { OP_HAS_MODRM, 0 }, // BD: BSR Gv,Ev + { OP_HAS_MODRM, 0 }, // BE: MOVSX Gv,Eb + { OP_HAS_MODRM, 0 }, // BF: MOVSX Gv,Ew + + { OP_HAS_MODRM, 0 }, // C0: XADD Eb,Gb + { OP_HAS_MODRM, 0 }, // C1: XADD Ev,Gv + { OP_HAS_MODRM|OP_HAS_IMM8, PREF_SSE_ALL }, // C2: CMPPS Vps, Wps, Ib + { OP_HAS_MODRM, PREF_NONE }, // C3: MOVNTI Md,Gd + { OP_HAS_MODRM|OP_HAS_IMM8, PREF_NONE|PREF_66 }, // C4: PINSRW {Pq,Vo},Mw,Ib / {Pq,Vo},G[wd],Ib + { OP_HAS_MODRM|OP_HAS_IMM8, PREF_NONE|PREF_66 }, // C5: PEXTRW Gd,PRq,Ib / Gd,VRo,Ib + { OP_HAS_MODRM|OP_HAS_IMM8, PREF_NONE|PREF_66 }, // C6: SHUFP{S,D} Vo,Wo,Ib + { OP_GROUP(GRP_9), 0 }, // C7: group 9 + + { 0, 0 }, // C8: BSWAP EAX + { 0, 0 }, // C9: BSWAP ECX + { 0, 0 }, // CA: BSWAP EDX + { 0, 0 }, // CB: BSWAP EBX + { 0, 0 }, // CC: BSWAP ESP + { 0, 0 }, // CD: BSWAP EBP + { 0, 0 }, // CE: BSWAP ESI + { 0, 0 }, // CF: BSWAP EDI + + { OP_HAS_MODRM, PREF_66|PREF_F2 }, // D0: ADDSUBP{D,S} Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // D1: PSRLW Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // D2: PSRLD Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // D3: PSRLQ Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // D4: PADDQ Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // D5: PMULLW Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_F3|PREF_66|PREF_F2 }, // D6: MOVQ2DQ Vo,PRq / MOVQ {M,V}q,Vq / MOVDQ2Q Pq,VRq + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // D7: PMOVMSKB Gd,PRq / Gd,VRo + + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // D8: PSUBUSB Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // D9: PSUBUSW Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // DA: PMINUB Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // DB: PAND Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // DC: PADDUSB Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // DD: PADDUSW Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // DE: PMAXUB Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // DF: PANDN Pq,Qq / Vo,Wo + + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // E0: PAVGB Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // E1: PSRAW Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // E2: PSRAD Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // E3: PAVGW Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // E4: PMULHUW Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // E5: PMULHW Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_F3|PREF_66|PREF_F2 }, // E6: CVTDQ2PD Vo,Wq / CVTTPD2DQ Vo,Wo / CVTPD2DQ Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // E7: MOVNTQ Mq,Pq / Mo,Vo + + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // E8: PSUBSB Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // E9: PSUBSW Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // EA: PMINSW Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // EB: POR Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // EC: PADDSB Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // ED: PADDSW Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // EE: PMAXSW Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // EF: PXOR Pq,Qq / Vo,Wo + +#ifdef EXTENDED_PATCHER + { OP_HAS_MODRM|OP_NEEDS_PATCH, PREF_F2 }, // F0: LDDQU Vo,Mo +#else + { OP_HAS_MODRM, PREF_F2 }, // F0: LDDQU Vo,Mo +#endif + + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // F1: PSLLW Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // F2: PSLLD Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // F3: PSLLQ Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // F4: PMULUDQ Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // F5: PMADDWD Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // F6: PSADBW Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // F7: MASKMOVQ Ppi,Qpi / MASKMOVDQU Vo,VRo + + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // F8: PSUBB Pq,Qq / Vo,Vw + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // F9: PSUBW Pq,Qq / Vo,Vw + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // FA: PSUBD Pq,Qq / Vo,Vw + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // FB: PSUBQ Pq,Qq / Vo,Vw + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // FC: PADDB Pq,Qq / Vo,Vw + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // FD: PADDW Pq,Qq / Vo,Vw + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // FE: PADDD Pq,Qq / Vo,Vw + + { OP_UNDEFINED, 0 } // FF +}; + +ext_opcode_t three_byte_38_table[256] = +{ + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 00: PSHUFB Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 01: PHADDW Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 02: PHADDD Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 03: PHADDSW Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 04: PMADDUBSW Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 05: PHSUBW Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 06: PHSUBD Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 07: PHSUBSW Pq,Qq / Vo,Wo + + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 08: PSIGNB Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 09: PSIGNW Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 0A: PSIGND Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 0B: PMULHRSW Pq,Qq / Vo,Wo + + [0x0c ... 0x0f] = { OP_UNDEFINED, 0 }, // 0C to 0f: undefined and non-SSSE3 opcodes + + { OP_HAS_MODRM, PREF_66 }, // 10: PBLENDVB Pq,Qq,Rq + + [0x11 ... 0x13] = { OP_UNDEFINED, 0 }, // 11 to 13: undefined and non-SSSE3 opcodes + + { OP_HAS_MODRM, PREF_66 }, // 14: BLENDVPS Pq,Qq,Rq + { OP_HAS_MODRM, PREF_66 }, // 15: BLENDVPD Pq,Qq,Rq + + { OP_UNDEFINED, 0 }, // 16: undefined and non-SSSE3 opcode + + { OP_HAS_MODRM, PREF_66 }, // 17: PTEST Pq,Qq + + [0x18 ... 0x1b] = { OP_UNDEFINED, 0 }, // 18 to 1B: undefined and non-SSSE3 opcodes + + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 1C: PABSB Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 1D: PABSW Pq,Qq / Vo,Wo + { OP_HAS_MODRM, PREF_NONE|PREF_66 }, // 1E: PABSD Pq,Qq / Vo,Wo + + { OP_UNDEFINED, 0 }, // 1F: undefined and non-SSSE3 opcode + + { OP_HAS_MODRM, PREF_66 }, // 20: PMOVSXBW Pq,Qq + { OP_HAS_MODRM, PREF_66 }, // 21: PMOVSXBD Pq,Qq + { OP_HAS_MODRM, PREF_66 }, // 22: PMOVSXBQ Pq,Qq + { OP_HAS_MODRM, PREF_66 }, // 23: PMOVSXWD Pq,Qq + { OP_HAS_MODRM, PREF_66 }, // 24: PMOVSXWQ Pq,Qq + { OP_HAS_MODRM, PREF_66 }, // 25: PMOVSXDQ Pq,Qq + + [0x26 ... 0x27] = { OP_UNDEFINED, 0 }, // 26 to 27: undefined and non-SSSE3 opcodes + + { OP_HAS_MODRM, PREF_66 }, // 28: PMULDQ Pq,Qq + { OP_HAS_MODRM, PREF_66 }, // 29: PCMPEQQ Pq,Qq + { OP_HAS_MODRM, PREF_66 }, // 2A: MOVNTDQA Pq,Qq + { OP_HAS_MODRM, PREF_66 }, // 2B: PACKUSDW Pq,Qq + + [0x2c ... 0x2f] = { OP_UNDEFINED, 0 }, // 2C to 2F: undefined and non-SSSE3 opcodes + + { OP_HAS_MODRM, PREF_66 }, // 30: PMOVZXBW Pq,Qq + { OP_HAS_MODRM, PREF_66 }, // 31: PMOVZXBD Pq,Qq + { OP_HAS_MODRM, PREF_66 }, // 32: PMOVZXBQ Pq,Qq + { OP_HAS_MODRM, PREF_66 }, // 33: PMOVZXWD Pq,Qq + { OP_HAS_MODRM, PREF_66 }, // 34: PMOVZXWQ Pq,Qq + { OP_HAS_MODRM, PREF_66 }, // 35: PMOVZXDQ Pq,Qq + + { OP_UNDEFINED, 0 }, // 36: undefined and non-SSSE3 opcode + + { OP_HAS_MODRM, PREF_66 }, // 37: PCMPGTQ Pq,Qq + { OP_HAS_MODRM, PREF_66 }, // 38: PMINSB Pq,Qq + { OP_HAS_MODRM, PREF_66 }, // 39: PMINSD Pq,Qq + { OP_HAS_MODRM, PREF_66 }, // 3A: PMINUW Pq,Qq + { OP_HAS_MODRM, PREF_66 }, // 3B: PMINUD Pq,Qq + { OP_HAS_MODRM, PREF_66 }, // 3C: PMAXSB Pq,Qq + { OP_HAS_MODRM, PREF_66 }, // 3D: PMAXSD Pq,Qq + { OP_HAS_MODRM, PREF_66 }, // 3E: PMAXUW Pq,Qq + { OP_HAS_MODRM, PREF_66 }, // 3F: PMAXUD Pq,Qq + { OP_HAS_MODRM, PREF_66 }, // 40: PMULLD Pq,Qq + { OP_HAS_MODRM, PREF_66 }, // 41: PHMINPOSUW Pq,Qq + + [0x42 ... 0xef] = { OP_UNDEFINED, 0 }, // 42 to EF: undefined and non-SSSE3 opcodes + + { OP_HAS_MODRM, PREF_F2 }, // F0: CRC32 Vo,Qq + { OP_HAS_MODRM, PREF_F2 }, // F1: CRC32 Vo,Qq + + [0xf2 ... 0xff] = { OP_UNDEFINED, 0 } // F2 to FF: undefined and non-SSSE3 opcodes +}; + +ext_opcode_t three_byte_3a_table[256] = +{ + [0x00 ... 0x07] = { OP_UNDEFINED, 0 }, // 00 to 07: undefined and non-SSSE3 opcodes + + { OP_HAS_MODRM|OP_HAS_IMM8, PREF_66 }, // 08: ROUNDPS Pq,Qq,Ib + { OP_HAS_MODRM|OP_HAS_IMM8, PREF_66 }, // 09: ROUNDPD Pq,Qq,Ib + { OP_HAS_MODRM|OP_HAS_IMM8, PREF_66 }, // 0A: ROUNDSS Pq,Qq,Ib + { OP_HAS_MODRM|OP_HAS_IMM8, PREF_66 }, // 0B: ROUNDSD Pq,Qq,Ib + { OP_HAS_MODRM|OP_HAS_IMM8, PREF_66 }, // 0C: BLENDPS Pq,Qq,Ib + { OP_HAS_MODRM|OP_HAS_IMM8, PREF_66 }, // 0D: BLENDPD Pq,Qq,Ib + { OP_HAS_MODRM|OP_HAS_IMM8, PREF_66 }, // 0E: PBLENDW Pq,Qq,Ib + { OP_HAS_MODRM|OP_HAS_IMM8, PREF_NONE|PREF_66 }, // 0F: PALIGNR Pq,Qq,Ib / Vo,Wo,Ib + + [0x10 ... 0x13] = { OP_UNDEFINED, 0 }, // 10 to 13: undefined and non-SSSE3 opcodes + + { OP_HAS_MODRM|OP_HAS_IMM8, PREF_66 }, // 14: PEXTRB Vo,Qq,Ib + { OP_HAS_MODRM|OP_HAS_IMM8, PREF_66 }, // 15: PEXTRW Vo,Qq,Ib + { OP_HAS_MODRM|OP_HAS_IMM8, PREF_66 }, // 16: PEXTRD Vo,Qq,Ib / PEXTRQ Vo,Qq,Ib + { OP_HAS_MODRM|OP_HAS_IMM8, PREF_66 }, // 17: EXTRACTPS Pq,Qq,Ib + + [0x18 ... 0x19] = { OP_UNDEFINED, 0 }, // 18 to 19: undefined and non-SSSE3 opcodes + + { OP_HAS_MODRM|OP_HAS_IMM8, PREF_66 }, // 20: PINSRB Pq,Qq,Ib + { OP_HAS_MODRM|OP_HAS_IMM8, PREF_66 }, // 21: INSERTPS Pq,Qq,Ib + { OP_HAS_MODRM|OP_HAS_IMM8, PREF_66 }, // 22: PINSRD Pq,Qq,Ib / PINSRQ Pq,Wo,Ib + + [0x23 ... 0x3f] = { OP_UNDEFINED, 0 }, // 23 to 3F: undefined and non-SSSE3 opcodes + + { OP_HAS_MODRM|OP_HAS_IMM8, PREF_66 }, // 40: DPPS Pq,Qq,Ib + { OP_HAS_MODRM|OP_HAS_IMM8, PREF_66 }, // 41: DPPD Pq,Qq,Ib + { OP_HAS_MODRM|OP_HAS_IMM8, PREF_66 }, // 42: MPSADBW Pq,Qq,Ib + + [0x43 ... 0x5f] = { OP_UNDEFINED, 0 }, // 43 to 5F: undefined and non-SSSE3 opcodes + + { OP_HAS_MODRM|OP_HAS_IMM8, PREF_66 }, // 60: PCMPESTRM Pq,Qq,Ib + { OP_HAS_MODRM|OP_HAS_IMM8, PREF_66 }, // 61: PCMPESTRI Pq,Qq,Ib + { OP_HAS_MODRM|OP_HAS_IMM8, PREF_66 }, // 62: PCMPISTRM Pq,Qq,Ib + { OP_HAS_MODRM|OP_HAS_IMM8, PREF_66 }, // 63: PCMPISTRI Pq,Qq,Ib + + [0x64 ... 0xff] = { OP_UNDEFINED, 0 } // 64 to FF: undefined and non-SSSE3 opcodes +}; + +/* get_insn_length: calculates the length of a single instruction + * + * arguments: insn: (in) pointer to instruction + * is_64bit: (in) specifies whether instruction set is x86-64 + * status: (out) returns STATUS_* flags (see disasm.h) + * returns: number of bytes in instruction + * INSN_INVALID if invalid + * INSN_UNSUPPORTED if unsupported + */ + +int32_t get_insn_length(uint8_t *insn, boolean_t is_64bit, uint8_t *status) +{ + uint32_t flag = 0; // instruction information + uint32_t prefix = 0; // all prefixes preceding opcode + uint8_t *eip = insn; // current location in instruction + uint8_t opcode; // last byte of opcode + + do { + flag &= ~(OP_PREFIX|OP_REX); + opcode = *eip++; + flag |= one_byte_table[opcode]; + if (!is_64bit) + flag &= ~OP_REX; + if (flag & (OP_PREFIX|OP_REX)) + prefix |= prefix_table[opcode]; + } while (flag & (OP_PREFIX|OP_REX)); + + if (flag & OP_TWOBYTE) { + ext_opcode_t *info; + opcode = *eip++; + info = &two_byte_table[opcode]; + flag |= info->flags; + if (flag & (OP_THREEBYTE_38|OP_THREEBYTE_3A)) { + ext_opcode_t *table; + if (flag & OP_THREEBYTE_38) + table = three_byte_38_table; + else if (flag & OP_THREEBYTE_3A) + table = three_byte_3a_table; + else // shut up optimizer (never reached) + return INSN_INVALID; + opcode = *eip++; + info = &table[opcode]; + flag |= info->flags; + } + if (!(prefix & ~(PREF_REX|PREF_REX_W))) + prefix |= PREF_NONE; + if (info->prefixes && !(info->prefixes & prefix)) + flag |= OP_UNDEFINED; + } + + if (flag & OP_GROUP_MASK) { + uint8_t reg = (*eip & 0x38) >> 3; + flag |= group_table[OP_GROUP_EXTRACT(flag)][reg]; + } + + if ((flag & OP_UNDEFINED) || (is_64bit && (flag & OP_IA32_ONLY))) + return INSN_INVALID; + + if (flag & OP_SPECIAL) { + /* detect certain instructions that are invalid unless in ring 0, unlikely to be used + * in 32-bit user code, or archaic (ie. bcd instructions). */ + if (flag & (OP_THREEBYTE_38|OP_THREEBYTE_3A)) + return INSN_UNSUPPORTED; + else if (flag & OP_TWOBYTE) { + switch (opcode) { + case 0x00: // {SLDT,STR,LLDT,LTR,VERR,VERW} {Mw,Rv} + case 0x01: // {SGDT,SIDT,LGDT,LIDT} Ms, SMSW Mw, LMSW {Mw,Rv} + // INVLPG M (also: SWAPGS/RDTSCP) + case 0x02: // LAR Gv,Ew + case 0x03: // LSL Gv,Ew + case 0x06: // CLTS + case 0x07: // SYSRET + case 0x08: // INVD + case 0x09: // WBINVD + case 0x20: // MOV Rd,Cd + case 0x21: // MOV Rd,Dd + case 0x22: // MOV Cd,Rd + case 0x23: // MOV Dd,Rd + case 0x24: // MOV Rd,Td + case 0x26: // MOV Td,Rd + case 0x30: // WRMSR + case 0x32: // RDMSR + case 0x35: // SYSEXIT + case 0xa0: // PUSH FS + case 0xa1: // POP FS + case 0xa8: // PUSH GS + case 0xa9: // POP GS + case 0xaa: // RSM + case 0xb2: // LSS Gz,Mp + case 0xb4: // LFS Gz,Mp + case 0xb5: // LGS Gz,Mp + break; + default: + return INSN_UNSUPPORTED; + } + } else { + switch (opcode) { + case 0x06: // PUSH ES + case 0x07: // POP ES + case 0x0e: // PUSH CS + case 0x16: // PUSH SS + case 0x17: // POP SS + case 0x1e: // PUSH DS + case 0x1f: // POP DS + case 0x27: // DAA + case 0x2f: // DAS + case 0x37: // AAA + case 0x3f: // AAS + case 0xc4: // LES Gz,Mp + case 0xc5: // LDS Gz,Mp + case 0xcf: // IRET + case 0xd4: // AAM Ib + case 0xd5: // AAD Ib + case 0x63: // ARPL Ew,Gw (MOVSXD Gv,Ed for x86-64) + break; + case 0x00: // ADD Eb,Gb + /* a two-byte null instruction almost certainly means we're decoding garbade + * or that we have reached padding. */ + if (*eip == 0x00) { // ADD [eax],al + *status |= STATUS_PADDING; + return 1; + } + break; + case 0x90: // NOP / PAUSE + if (!(prefix & PREF_F3)) { // make sure not PAUSE + *status |= STATUS_PADDING; + return 1; + } + break; +#ifdef EXTENDED_PATCHER + case 0xff: // JMP Ev (FF/4) / JMP Mp (FF/5) + case 0xea: // JMP Ap + /* an absolute unconditional jump is often followed by garbage, so we inform + * the calling function that what follows is probably invalid. */ + *status |= STATUS_REST; + break; +#endif + default: + return INSN_UNSUPPORTED; + } + } + } + + if (flag & OP_NEEDS_PATCH) + *status |= STATUS_NEEDS_PATCH; + + if (!(flag & OP_OPERANDS)) + return (uint32_t) (eip - insn); + + if (is_64bit && (flag & OP_CHECK_REX)) { + if (prefix & PREF_REX_W) + flag |= OP_HAS_IMM64; + else if (prefix & PREF_66) + flag |= OP_HAS_IMM16; + else + flag |= OP_HAS_IMM32; + } else if (flag & OP_CHECK_66) { + if (prefix & PREF_66) + flag |= OP_HAS_IMM16; + else + flag |= OP_HAS_IMM32; + } else if (flag & OP_CHECK_67) { + if (is_64bit) { + if (prefix & PREF_67) + flag |= OP_HAS_IMM32; + else + flag |= OP_HAS_IMM64; + } else { + if (prefix & PREF_67) + flag |= OP_HAS_IMM16; + else + flag |= OP_HAS_IMM32; + } + } + + if (flag & OP_HAS_MODRM) { + uint32_t modrm = *eip++; + uint32_t mod = modrm >> 6; + uint32_t rm = modrm & 0x7; + + if (prefix & PREF_67) { + if (mod == 1) + flag |= OP_HAS_DISP8; + else if (mod == 2) + flag |= OP_HAS_DISP16; + else if (mod == 0 && rm == 6) + flag |= OP_HAS_DISP16; + } else { + if (mod == 1) + flag |= OP_HAS_DISP8; + else if (mod == 2) + flag |= OP_HAS_DISP32; + else if (mod == 0 && rm == 5) + flag |= OP_HAS_DISP32; + if (mod < 3 && rm == 4) { + uint32_t sib_base = *eip++ & 0x7; + if (mod == 0 && sib_base == 5) + flag |= OP_HAS_DISP32; + } + } + } + + if (flag & OP_HAS_IMM8) + eip++; + if (flag & OP_HAS_IMM16) + eip += 2; + if (flag & OP_HAS_IMM32) + eip += 4; + if (flag & OP_HAS_IMM64) + eip += 8; + if (flag & OP_HAS_DISP8) + eip++; + if (flag & OP_HAS_DISP16) + eip += 2; + if (flag & OP_HAS_DISP32) + eip += 4; + + return (uint32_t) (eip - insn); +} + +/* old sysenter_trap: + * +0 5a popl %edx [returned by check_sysenter_trap] + * +1 89e1 movl %esp,%ecx + * +3 0f34 sysenter [arg for check_sysenter_trap] + * +5 0f1f00 nopl (%eax) + * new sysenter_trap: + * +0 59 popl %ecx + * +1 cdfc int $0xfc + * +3 51 pushl %ecx + * +4 c3 ret + * +5 0f1f00 nopl (%eax) + */ + +uint8_t *check_sysenter_trap(uint8_t *insn) +{ + uint32_t peek_back, peek_ahead; + if (*(uint16_t *) insn != 0x340f) + return (uint8_t *) -1; + peek_back = *(uint32_t *) (insn - 4); + if ((peek_back & 0xffffff00) != 0xe1895a00) + return (uint8_t *) -1; + peek_ahead = *(uint32_t *) (insn + 2); + if ((peek_ahead & 0x00ffffff) != 0x00001f0f) + return (uint8_t *) -1; + return (insn - 3); +} + +const uint8_t new_sysenter_trap[8] = { 0x59, 0xcd, 0xfc, 0x51, 0xc3, 0x0f, 0x1f, 0x00 }; + +void patch_sysenter_trap(uint8_t *begin) +{ + *(uint32_t *) begin = *(const uint32_t *) new_sysenter_trap; + *(uint32_t *) (begin + 4) = *(const uint32_t *) (new_sysenter_trap + 4); +} + +boolean_t patch_insn(uint8_t *insn, boolean_t verbose, boolean_t is_64bit) +{ +#ifdef EXTENDED_PATCHER + uint32_t opcode = *(uint32_t *) insn; + + if ((KERN_patcherOpts & OPT_PATCHER_FISTTP) && ((insn[0] & 0xf0) == 0xd0) && + (((insn[1] >> 3) & 7) == 1)) { + switch (insn[0]) { + case 0xdf: /* word */ + case 0xdb: /* dword */ + insn[1] |= (3 << 3); + break; + case 0xdd: /* qword */ + insn[0] = 0xdf; + insn[1] |= (7 << 3); + break; + default: + return FALSE; + } + if (verbose) + printf("(patching fisttp to fistp)\n"); + return TRUE; + } + + if ((KERN_patcherOpts & OPT_PATCHER_LDDQU) && ((opcode & 0x00ffffff) == LDDQU)) { + if (verbose) + printf("(patching lddqu to movdqu)\n"); + opcode &= 0xff000000; /* clear opcode, leave operand */ + opcode |= 0x006f0ff3; /* patch with movdqu */ + *(uint32_t *) insn = opcode; + return TRUE; + } +#endif + + if ((KERN_patcherOpts & OPT_PATCHER_CPUID) && (*(uint16_t *) insn == CPUID)) { + if (verbose) + printf("(patching cpuid to int 0xfb)\n"); + *(uint16_t *) insn = 0xfbcd; /* int 0xfb */ + return TRUE; + } + + if (!is_64bit && (KERN_patcherOpts & OPT_PATCHER_SYSENTER) && (*(uint16_t *) insn == SYSENTER)) { + uint8_t *begin = check_sysenter_trap(insn); + if (begin == (uint8_t *) -1) + return FALSE; + if (verbose) + printf("(patching sysenter_trap)\n"); + patch_sysenter_trap(begin); + return TRUE; + } + + return FALSE; +} + +uint32_t scan_text_section(uint8_t *start, uint64_t size, uint64_t text_addr, + boolean_t should_patch, boolean_t abi_is_64, boolean_t verbose, + uint32_t *num_patches_out) +{ + int32_t res; + uint8_t *insn, *end, *last_bad; + uint32_t num_bad, num_patches; + + insn = start; + end = start + size; + last_bad = NULL; + num_bad = 0; + num_patches = 0; + + if (verbose) { + uint64_t addr = text_addr; + for (res = 0; insn < end; insn += res, addr += res) { + uint8_t status = 0; + res = get_insn_length(insn, abi_is_64, &status); + if (res == INSN_INVALID) { +#ifdef EXTRA_VERBOSE + printf("%08llx: (bad)\n", addr); +#endif + res = 1; + last_bad = insn; + num_bad++; + } else if (res == INSN_UNSUPPORTED) { + printf("%08llx: (unsupported)\n", addr); + res = 1; + last_bad = insn; + num_bad++; + } else if (status) { + if (status & STATUS_PADDING) { + uint32_t n; + for (n = 1; (insn + n) < end; n++) + if (insn[n] != insn[0]) + break; +#ifdef EXTRA_VERBOSE + printf("%08llx: (%d bytes padding)\n", addr, n); +#endif + res = n; + continue; + } +#ifdef EXTENDED_PATCHER + if (status & STATUS_REST) { + last_bad = insn; +# ifdef EXTRA_VERBOSE + printf("%08llx: (will rest)\n", addr); +# endif + } +#endif + if (!(status & STATUS_NEEDS_PATCH)) + continue; + printf("%08llx: ", addr); + if (!should_patch || ((insn - last_bad) <= REST_SIZE)) { + printf("(skipped patch)\n"); + continue; + } + if (!patch_insn(insn, verbose, abi_is_64)) + printf("(unrecognized patch)\n"); + else + num_patches++; + } + } + } else { + for (res = 0; insn < end; insn += res) { + uint8_t status = 0; + res = get_insn_length(insn, abi_is_64, &status); + if (res <= 0) { /* INSN_INVALID or INSN_UNSUPPORTED */ + res = 1; + last_bad = insn; + num_bad++; + } else { +#ifdef EXTENDED_PATCHER + if (status & (STATUS_REST|STATUS_PADDING)) { + if (status & STATUS_PADDING) { + uint32_t n; + for (n = 1; (insn + n) < end; n++) + if (insn[n] != insn[0]) + break; + res = n; + continue; + } else + last_bad = insn; + } +#else + if (status & STATUS_PADDING) { + uint32_t n; + for (n = 1; (insn + n) < end; n++) + if (insn[n] != insn[0]) + break; + res = n; + continue; + } +#endif + if ((status & STATUS_NEEDS_PATCH) && should_patch && + ((insn - last_bad) > REST_SIZE) && + patch_insn(insn, verbose, abi_is_64)) + num_patches++; + } + } + } + + *num_patches_out = num_patches; + + return num_bad; +} + +/* segment loading routines (for patching). */ + +#define DEFINE_GETSEG(x) \ +\ +struct segment_command##x *getsegforpatch##x(struct mach_header##x *header, \ +const char *seg_name) \ +{ \ +struct segment_command##x *sgp; \ +uint32_t i; \ +\ +sgp = (struct segment_command##x *) ((char *) header + sizeof (struct mach_header##x)); \ +for (i = 0; i < header->ncmds; i++) { \ +if (sgp->cmd == LC_SEGMENT##x && !strncmp(sgp->segname, seg_name, \ +sizeof (sgp->segname))) \ +return sgp; \ +sgp = (struct segment_command##x *) ((char *) sgp + sgp->cmdsize); \ +} \ +\ +return NULL; \ +} + +DEFINE_GETSEG() +DEFINE_GETSEG(_64) + +#define DEFINE_GETSECT(x) \ +\ +struct section##x *getsectforpatch##x(struct mach_header##x *header, \ +const char *segname, const char *sectname) \ +{ \ +struct segment_command##x *sgp; \ +struct section##x *sp; \ +uint32_t i; \ +\ +sgp = getsegforpatch##x(header, segname); \ +if (!sgp) \ +return NULL; \ +\ +sp = (struct section##x *) ((char *) sgp + sizeof (struct segment_command##x)); \ +for (i = 0; i < sgp->nsects; i++){ \ +if (!strncmp(sp->sectname, sectname, sizeof (sp->sectname)) && \ +!strncmp(sp->segname, segname, sizeof (sp->segname))) \ +return sp; \ +sp = (struct section##x *) ((char *) sp + sizeof (struct section##x)); \ +} \ +\ +return NULL; \ +} + +DEFINE_GETSECT() +DEFINE_GETSECT(_64) + +/* note: the map_addr and map_size arguments are used only for error checking. */ + +kern_return_t patch_text_segment(uint8_t *addr, __unused mach_vm_offset_t map_addr, + mach_vm_size_t map_size, boolean_t abi_is_64, boolean_t seg_is_64, + boolean_t verbose, boolean_t *bypass, uint32_t *num_patches_out, + uint32_t *num_bad_out) +{ + uint64_t text_addr, text_size; + uint32_t text_offset; + uint8_t *text_data; + uint64_t tmp_size; + uint32_t num_patches, num_bad; + + *bypass = FALSE; + + if (seg_is_64) { + struct section_64 *text_sect; + text_sect = getsectforpatch_64((struct mach_header_64 *) addr, "__TEXT", "__text"); + if (!text_sect) { + if (verbose) + printf("getsectforpatch_64 failed (text segment appears " + "to contain garbage, bypassing patcher)\n"); + *bypass = TRUE; + return KERN_FAILURE; + } + text_addr = text_sect->addr; + text_size = text_sect->size; + text_offset = text_sect->offset; + } else { + struct section *text_sect; + text_sect = getsectforpatch((struct mach_header *) addr, "__TEXT", "__text"); + if (!text_sect) { + if (verbose) + printf("getsectforpatch failed (text segment appears " + "to contain garbage, bypassing patcher)\n"); + *bypass = TRUE; + return KERN_FAILURE; + } + text_addr = (uint64_t) text_sect->addr; + text_size = (uint64_t) text_sect->size; + text_offset = text_sect->offset; + } + + tmp_size = (uint64_t) text_offset + text_size; +#ifdef FIXME + /* xxx: this check only makes sense if map_addr is guaranteed to be vmaddr */ + if ((text_addr - map_addr) > map_size) { + printf("text section address not within mapped range\n"); + return KERN_FAILURE; + } else +#endif + if (tmp_size > map_size) { + printf("text section offset and size greater than mapping size\n"); + return KERN_FAILURE; + } else if ((tmp_size + 16) > map_size) { + /* take care not to access anything beyond the mapped range if the text + * section ends within 16 bytes (maximum instruction length is 15 bytes) + * of the end */ + text_size -= 16 - (map_size - tmp_size); + } + + text_data = (uint8_t *) addr + text_offset; + + if (verbose) { + uint32_t n; + for (n = 0; n < 16; n++) + printf("%02x ", text_data[n]); + printf("\n"); + } + + /* before attempting to patch anything, scan through some of the section and verify + * that what we are attempting to patch is not total garbage. */ + num_bad = scan_text_section(text_data, min(text_size, PRESCAN_SIZE), text_addr, FALSE, + abi_is_64, verbose, &num_patches); + if (verbose) + printf("prescan found %d bad instructions\n", num_bad); + if (num_bad >= PRESCAN_MAX_BAD) { + if (verbose) + printf("text section appears to contain garbage, bypassing patcher\n"); + *bypass = TRUE; + return KERN_FAILURE; + } + + /* now that we have decided the text section contains valid code, scan through the + * whole section and perform the actual patching. */ + num_bad = scan_text_section(text_data, text_size, text_addr, TRUE, abi_is_64, verbose, + &num_patches); + if (verbose) + printf("complete scan found %d bad instructions\n", num_bad); + + *num_patches_out = num_patches; + *num_bad_out = num_bad; + + return KERN_SUCCESS; +} diff -Naur xnu-1504.9.26.orig/bsd/kern/disasm.h xnu-1504.9.26/bsd/kern/disasm.h --- xnu-1504.9.26.orig/bsd/kern/disasm.h 1969-12-31 19:00:00.000000000 -0500 +++ xnu-1504.9.26/bsd/kern/disasm.h 2011-01-09 16:00:20.000000000 -0500 @@ -0,0 +1,62 @@ +/* + * instruction length decoder (written by kaitek, modified by mercurysquad) + * voodoo xnu kernel + */ + +#ifndef _DISASM_H +#define _DISASM_H + +#include + +/* EXTENDED_PATCHER enables FISTTP/LDDQU patching support */ +#undef EXTENDED_PATCHER +/* EXTRA_VERBOSE causes increased verbosity when OPT_PATCHER_DEBUG is set */ +#undef EXTRA_VERBOSE + +#ifdef EXTENDED_PATCHER +# define LDDQU 0xf00ff2 +#endif +#define CPUID 0xa20f +#define SYSENTER 0x340f + +/* INSN_* are possible return codes from get_insn_length if length can't be found */ +#define INSN_INVALID 0 +#define INSN_UNSUPPORTED (-1) + +/* STATUS_* are possible status codes written bit-packed to the location specified + * by the status argument to get_insn_length */ +#define STATUS_NEEDS_PATCH (1 << 0) +#define STATUS_PADDING (1 << 1) +#ifdef EXTENDED_PATCHER +# define STATUS_REST (1 << 2) +#endif + +struct segment_command *getsegforpatch(struct mach_header *header, const char *seg_name); +struct segment_command_64 *getsegforpatch_64(struct mach_header_64 *header, const char *seg_name); + +struct section *getsectforpatch(struct mach_header *header, const char *segname, const char *sectname); +struct section_64 *getsectforpatch_64(struct mach_header_64 *header, const char *segname, const char *sectname); + +int32_t get_insn_length(uint8_t *insn, boolean_t is_64bit, uint8_t *status); + +boolean_t patch_insn(uint8_t *insn, boolean_t verbose, boolean_t is_64bit); + +uint32_t scan_text_section(uint8_t *start, uint64_t size, uint64_t text_addr, + boolean_t should_patch, boolean_t abi_is_64, boolean_t verbose, + uint32_t *num_patches_out); + +kern_return_t patch_text_segment(uint8_t *addr, mach_vm_offset_t map_addr, + mach_vm_size_t map_size, boolean_t abi_is_64, boolean_t seg_is_64, + boolean_t verbose, boolean_t *bypass, uint32_t *num_patches_out, + uint32_t *num_bad_out); + +/* magic numbers fine-tuned for accurate disassembly; don't mess with these unless + * you really know what you are doing. */ +#define REST_SIZE 25 +#define PRESCAN_SIZE 1000 +#define PRESCAN_MAX_BAD 20 + +uint8_t *check_sysenter_trap(uint8_t *insn); +void patch_sysenter_trap(uint8_t *begin); + +#endif diff -Naur xnu-1504.9.26.orig/bsd/kern/dyld.c xnu-1504.9.26/bsd/kern/dyld.c --- xnu-1504.9.26.orig/bsd/kern/dyld.c 1969-12-31 19:00:00.000000000 -0500 +++ xnu-1504.9.26/bsd/kern/dyld.c 2011-01-09 16:00:20.000000000 -0500 @@ -0,0 +1,6 @@ +#include + +/* kaitek: custom dyld support is automatically disabled if dyld_size is zero. */ + +const uint8_t dyld_binary[] = { 0x00 }; +const uint32_t dyld_size = 0; diff -Naur xnu-1504.9.26.orig/bsd/kern/dyld.h xnu-1504.9.26/bsd/kern/dyld.h --- xnu-1504.9.26.orig/bsd/kern/dyld.h 1969-12-31 19:00:00.000000000 -0500 +++ xnu-1504.9.26/bsd/kern/dyld.h 2011-01-09 16:00:20.000000000 -0500 @@ -0,0 +1,9 @@ +#ifndef _DYLD_H +#define _DYLD_H + +#include + +extern const uint32_t dyld_size; +extern const uint8_t dyld_binary[]; + +#endif diff -Naur xnu-1504.9.26.orig/bsd/kern/kern_exec.c xnu-1504.9.26/bsd/kern/kern_exec.c --- xnu-1504.9.26.orig/bsd/kern/kern_exec.c 2011-01-06 11:45:26.000000000 -0500 +++ xnu-1504.9.26/bsd/kern/kern_exec.c 2011-01-09 16:00:20.000000000 -0500 @@ -185,6 +185,7 @@ #include +#include /* * SIZE_MAXPTR The maximum size of a user space pointer, in bytes @@ -3129,6 +3130,9 @@ */ set_security_token(p); + if (KERN_patcherOpts & OPT_PATCHER_DEBUG) + dump_patcher_opts(); + error = execve(p,&init_exec_args,retval); if (error) panic("Process 1 exec of %s failed, errno %d\n", diff -Naur xnu-1504.9.26.orig/bsd/kern/kern_mib.c xnu-1504.9.26/bsd/kern/kern_mib.c --- xnu-1504.9.26.orig/bsd/kern/kern_mib.c 2011-01-06 11:45:26.000000000 -0500 +++ xnu-1504.9.26/bsd/kern/kern_mib.c 2011-01-09 16:00:20.000000000 -0500 @@ -117,7 +117,13 @@ #include /* for cpuid_info() */ #endif - +#ifndef MACMODEL +#define MACMODEL "MacPro2,1" +#endif /* MACMODEL */ + +#ifndef MACMODELLEN +#define MACMODELLEN 10 +#endif /* MACMODELLEN */ #ifndef MAX #define MAX(a,b) (a >= b ? a : b) @@ -174,6 +180,7 @@ int arg2, struct sysctl_req *req) { char dummy[65]; + char dummylen; int epochTemp; ml_cpu_info_t cpu_info; int val, doquad; @@ -273,11 +280,31 @@ dummy[64] = 0; return(SYSCTL_OUT(req, dummy, strlen(dummy) + 1)); case HW_MODEL: - bzero(dummy, sizeof(dummy)); - if(!PEGetModelName(dummy,64)) - return(EINVAL); - dummy[64] = 0; - return(SYSCTL_OUT(req, dummy, strlen(dummy) + 1)); + if (PE_parse_boot_argn("macmodel", &dummy, 64)) + { + dummylen = strlen(dummy); + + if (dummylen >= 64) + { + dummylen = 63; + } + + if (dummylen > 1) + { + dummy[dummylen + 1] = 0; + printf("\"macmodel\" boot argument found, Mac model set as \"%s\"\n", dummy); + } else { + //strncpy(dummy, MACMODEL, (MACMODELLEN + 1)); + strlcpy(dummy, MACMODEL, MACMODELLEN); + printf("\"macmodel\" boot argument not found, Mac model set as \"%s\"\n", MACMODEL); + } + } else { + bzero(dummy, sizeof(dummy)); + if(!PEGetModelName(dummy,64)) + return(EINVAL); + } + dummy[64] = 0; + return(SYSCTL_OUT(req, dummy, strlen(dummy) + 1)); case HW_USERMEM: { int usermem = mem_size - vm_page_wire_count * page_size; @@ -445,6 +472,15 @@ #undef x86_64 SYSCTL_INT(_hw_optional, OID_AUTO, x86_64, CTLFLAG_RD | CTLFLAG_KERN, &x86_64_flag, 0, ""); SYSCTL_INT(_hw_optional, OID_AUTO, aes, CTLFLAG_RD | CTLFLAG_KERN, &aes_flag, 0, ""); +/* mercurysquad: declare sysctl key to enable/disable opcode patching */ +uint32_t KERN_patcherOpts = 0; /* disabled by default, enabled in i386_init based on the cpu */ +/* this value is bit-packed; see the OPT_PATCHER_* definitions in i386/patcher_opts.h */ +SYSCTL_INT(_hw_optional, OID_AUTO, patcher_opts, CTLFLAG_RW | CTLFLAG_SECURE, &KERN_patcherOpts, 0, +# ifdef EXTENDED_PATCHER + "Control CPUID emulation for AMD and LDDQU/FISTTP emulation for SSE2 processors"); +# else + "Control CPUID emulation for AMD processors"); +# endif #endif /* __ppc__ */ /* @@ -564,7 +600,7 @@ cpufamily = CPUFAMILY_POWERPC_G5; break; default: - cpufamily = CPUFAMILY_UNKNOWN; + cpufamily = CPUFAMILY_INTEL_6_14; } ml_cpu_info_t cpu_info; @@ -577,20 +613,40 @@ { hinfo.max_cpus = 1; } - - /* hw.cachesize */ - cachesize[0] = max_mem; - cachesize[1] = cpu_info.l1_dcache_size; - cachesize[2] = cpu_info.l2_settings ? cpu_info.l2_cache_size : 0; - cachesize[3] = cpu_info.l3_settings ? cpu_info.l3_cache_size : 0; - cachesize[4] = 0; - - /* hw.cacheconfig */ - cacheconfig[0] = hinfo.max_cpus; - cacheconfig[1] = 1; - cacheconfig[2] = cachesize[2] ? 1 : 0; - cacheconfig[3] = cachesize[3] ? 1 : 0; - cacheconfig[4] = 0; + if (IsIntelCPU() && + /* Pentium M or higher */ + (( (cpuid_info()->cpuid_family == CPU_FAMILY_PENTIUM_M) && (cpuid_info()->cpuid_model >= 14) ) || + /* Pentium 4 HT model 3 or higher */ + ( (cpuid_info()->cpuid_family == CPU_FAMILY_PENTIUM_4) && (cpuid_info()->cpuid_model >= 3) ) )) + { + /* Use stock code */ + cacheconfig[0] = ml_cpu_cache_sharing(0); + cacheconfig[1] = ml_cpu_cache_sharing(1); + cacheconfig[2] = ml_cpu_cache_sharing(2); + cacheconfig[3] = ml_cpu_cache_sharing(3); + cacheconfig[4] = 0; + + /* hw.cachesize */ + cachesize[0] = ml_cpu_cache_size(0); + cachesize[1] = ml_cpu_cache_size(1); + cachesize[2] = ml_cpu_cache_size(2); + cachesize[3] = ml_cpu_cache_size(3); + cachesize[4] = 0; + } else { + /* Other CPUs, we just use what we calculated in cpuid.c */ + cacheconfig[0] = ml_cpu_cache_sharing(0); + cacheconfig[1] = cpuid_info()->cache_sharing[L1D]; + cacheconfig[2] = cpuid_info()->cache_sharing[L2U]; + cacheconfig[3] = cpuid_info()->cache_sharing[L3U]; + cacheconfig[4] = 0; + + /* hw.cachesize */ + cachesize[0] = ml_cpu_cache_size(0); + cachesize[1] = cpuid_info()->cache_size[L1D]; + cachesize[2] = cpuid_info()->cache_size[L2U]; + cachesize[3] = cpuid_info()->cache_size[L3U]; + cachesize[4] = 0; + }; /* hw.packages */ if (cpusubtype == CPU_SUBTYPE_POWERPC_970 && @@ -614,20 +670,41 @@ /* hw.cpufamily */ cpufamily = cpuid_cpufamily(); - /* hw.cacheconfig */ - cacheconfig[0] = ml_cpu_cache_sharing(0); - cacheconfig[1] = ml_cpu_cache_sharing(1); - cacheconfig[2] = ml_cpu_cache_sharing(2); - cacheconfig[3] = ml_cpu_cache_sharing(3); - cacheconfig[4] = 0; - - /* hw.cachesize */ - cachesize[0] = ml_cpu_cache_size(0); - cachesize[1] = ml_cpu_cache_size(1); - cachesize[2] = ml_cpu_cache_size(2); - cachesize[3] = ml_cpu_cache_size(3); - cachesize[4] = 0; - + if (IsIntelCPU() && + /* Pentium M or higher */ + (( (cpuid_info()->cpuid_family == CPU_FAMILY_PENTIUM_M) && (cpuid_info()->cpuid_model >= 14) ) || + /* Pentium 4 HT model 3 or higher */ + ( (cpuid_info()->cpuid_family == CPU_FAMILY_PENTIUM_4) && (cpuid_info()->cpuid_model >= 3) ) )) + { + /* Use stock code */ + cacheconfig[0] = ml_cpu_cache_sharing(0); + cacheconfig[1] = ml_cpu_cache_sharing(1); + cacheconfig[2] = ml_cpu_cache_sharing(2); + cacheconfig[3] = ml_cpu_cache_sharing(3); + cacheconfig[4] = 0; + + /* hw.cachesize */ + cachesize[0] = ml_cpu_cache_size(0); + cachesize[1] = ml_cpu_cache_size(1); + cachesize[2] = ml_cpu_cache_size(2); + cachesize[3] = ml_cpu_cache_size(3); + cachesize[4] = 0; + } else { + /* Other CPUs, we just use what we calculated in cpuid.c */ + cacheconfig[0] = ml_cpu_cache_sharing(0); + cacheconfig[1] = cpuid_info()->cache_sharing[L1D]; + cacheconfig[2] = cpuid_info()->cache_sharing[L2U]; + cacheconfig[3] = cpuid_info()->cache_sharing[L3U]; + cacheconfig[4] = 0; + + /* hw.cachesize */ + cachesize[0] = ml_cpu_cache_size(0); + cachesize[1] = cpuid_info()->cache_size[L1D]; + cachesize[2] = cpuid_info()->cache_size[L2U]; + cachesize[3] = cpuid_info()->cache_size[L3U]; + cachesize[4] = 0; + }; + /* hw.packages */ packages = roundup(ml_cpu_cache_sharing(0), cpuid_info()->thread_count) / cpuid_info()->thread_count; diff -Naur xnu-1504.9.26.orig/bsd/kern/mach_fat.c xnu-1504.9.26/bsd/kern/mach_fat.c --- xnu-1504.9.26.orig/bsd/kern/mach_fat.c 2011-01-06 11:45:26.000000000 -0500 +++ xnu-1504.9.26/bsd/kern/mach_fat.c 2011-01-09 16:00:20.000000000 -0500 @@ -56,7 +56,7 @@ * Returns: KERN_SUCCESS: Valid architecture found. * KERN_FAILURE: No valid architecture found. **********************************************************************/ -static load_return_t +load_return_t fatfile_getarch2( #if 0 struct vnode *vp, diff -Naur xnu-1504.9.26.orig/bsd/kern/mach_fat.h xnu-1504.9.26/bsd/kern/mach_fat.h --- xnu-1504.9.26.orig/bsd/kern/mach_fat.h 2011-01-06 11:45:26.000000000 -0500 +++ xnu-1504.9.26/bsd/kern/mach_fat.h 2011-01-09 16:00:20.000000000 -0500 @@ -40,5 +40,7 @@ struct fat_arch *archret); load_return_t fatfile_getarch_with_bits(struct vnode *vp, integer_t archbits, vm_offset_t data_ptr, struct fat_arch *archret); +load_return_t fatfile_getarch2(struct vnode *vp, vm_offset_t data_ptr, cpu_type_t req_cpu_type, + cpu_type_t mask_bits, struct fat_arch *archret); #endif /* _BSD_KERN_MACH_FAT_H_ */ diff -Naur xnu-1504.9.26.orig/bsd/kern/mach_loader.c xnu-1504.9.26/bsd/kern/mach_loader.c --- xnu-1504.9.26.orig/bsd/kern/mach_loader.c 2011-01-06 11:45:26.000000000 -0500 +++ xnu-1504.9.26/bsd/kern/mach_loader.c 2011-01-09 16:00:20.000000000 -0500 @@ -35,6 +35,9 @@ * * 21-Jul-88 Avadis Tevanian, Jr. (avie) at NeXT * Started. + * + * 22-Sep-2008 kaitek (voodoo xnu kernel) + * Modifications made to support on-the-fly opcode patching */ #include @@ -49,6 +52,7 @@ #include #include #include +#include #include #include /* vm_allocate() */ @@ -56,6 +60,8 @@ #include #include #include +#include +#include #include #include @@ -66,8 +72,12 @@ #include #include #include +#include #include #include +#include +#include +#include #include #include @@ -79,6 +89,37 @@ #include #include +#include +#include +#include + +#include + +#include +#include + +#define MTS_MODE_DEFAULT 1 +#define MTS_MODE_FIXED 2 +#define MTS_MODE_ANYWHERE 3 + +/* kaitek: following xnu layering rules it isn't quite kosher to deal with vm_objects at this + * level, but for our purposes there's nothing technically wrong with it */ + +extern kern_return_t vm_map_enter( + vm_map_t map, + vm_map_offset_t *address, + vm_map_size_t size, + vm_map_offset_t mask, + int flags, + vm_object_t object, + vm_object_offset_t offset, + boolean_t needs_copy, + vm_prot_t cur_protection, + vm_prot_t max_protection, + vm_inherit_t inheritance +); + +extern void ipc_port_release(ipc_port_t); /* * XXX vm/pmap.h should not treat these prototypes as MACH_KERNEL_PRIVATE @@ -123,12 +164,118 @@ .uuid = { 0 } }; +typedef struct custom_binary { + ipc_port_t object; + vm_offset_t offset; + vm_size_t size; +} *custom_binary_t; + +typedef struct macho_data { + vnode_t vp; + custom_binary_t custom; + const char *name; + void *cmds; + size_t cmds_size; +} *macho_data_t; + +static void check_macho_data( + macho_data_t data +); + +static void free_macho_data( + macho_data_t data +); + +static void check_custom_binary( + custom_binary_t custom +); + +__unused static void free_custom_binary( + custom_binary_t custom +); + +static kern_return_t create_custom_binary( + const void *data, + vm_size_t size, + custom_binary_t *custom +); + +static kern_return_t read_from_custom_binary( + custom_binary_t custom, + vm_offset_t offset, + vm_size_t size, + void *dest +); + +/* kaitek: prototypes for new segment loader related functions */ + +static kern_return_t patch_exec_obj_segment( + vm_object_t exec_obj, + mach_vm_offset_t map_offset, + mach_vm_size_t map_size, + mach_vm_offset_t map_addr, + boolean_t abi_is_64, + boolean_t seg_is_64, + uint32_t fileoff, + uint32_t filesize, + boolean_t is_encrypted, + boolean_t verbose, + boolean_t dont_patch, + uint32_t *num_patches_out, + boolean_t *bypass +); + +kern_return_t map_segment( + vnode_t vp, + vm_map_t map, + mach_vm_offset_t map_offset, + mach_vm_size_t map_size, + mach_vm_offset_t map_addr, + boolean_t seg_is_text, + boolean_t abi_is_64, + boolean_t seg_is_64, + uint32_t fileoff, + uint32_t filesize, + vm_prot_t maxprot, + vm_prot_t initprot, + boolean_t is_encrypted, + boolean_t verbose, + boolean_t dont_patch +); + +kern_return_t patch_custom_binary( + custom_binary_t custom, + integer_t cputype, + boolean_t verbose +); + +static load_return_t load_segment_patched( + macho_data_t data, + off_t /* int64_t */ pager_offset, + off_t /* int64_t */ macho_size, + vm_map_t map, + load_result_t *result, + boolean_t abi_is_64, + boolean_t seg_is_64, + uint64_t vmaddr, + uint64_t vmsize, + uint32_t fileoff, + uint32_t filesize, + vm_prot_t maxprot, + vm_prot_t initprot, + uint32_t flags, + char *segname, + boolean_t dont_patch, + boolean_t anywhere, + mach_vm_offset_t *user_addr +); + /* * Prototypes of static functions. */ static load_return_t parse_machfile( - struct vnode *vp, + macho_data_t data, vm_map_t map, thread_t thread, struct mach_header *header, @@ -213,7 +360,8 @@ thread_t thread, int depth, load_result_t *result, - boolean_t is_64bit + boolean_t is_64bit, + boolean_t force_std ); static load_return_t @@ -226,6 +374,15 @@ struct vnode **vpp ); +static load_return_t +parse_custom_binary( + custom_binary_t custom, + integer_t archbits, + struct mach_header *mach_header, + off_t *file_offset, + off_t *macho_size +); + static inline void widen_segment_command(const struct segment_command *scp32, struct segment_command_64 *scp) @@ -291,6 +448,7 @@ load_result_t myresult; load_return_t lret; boolean_t create_map = FALSE; + struct macho_data data; int spawn = (imgp->ip_flags & IMGPF_SPAWN); task_t task = current_task(); @@ -331,8 +489,15 @@ *result = load_result_null; - lret = parse_machfile(vp, map, thread, header, file_offset, macho_size, + data.vp = vp; + data.custom = NULL; + data.name = vp->v_name; + data.cmds = NULL; + data.cmds_size = 0; + + lret = parse_machfile(&data, map, thread, header, file_offset, macho_size, 0, result); + free_macho_data(&data); if (lret != LOAD_SUCCESS) { if (create_map) { @@ -402,7 +567,7 @@ static load_return_t parse_machfile( - struct vnode *vp, + macho_data_t data, vm_map_t map, thread_t thread, struct mach_header *header, @@ -420,18 +585,33 @@ void * control; load_return_t ret = LOAD_SUCCESS; caddr_t addr; - void * kl_addr; - vm_size_t size,kl_size; + vm_size_t size; size_t offset; size_t oldoffset; /* for overflow check */ int pass; proc_t p = current_proc(); /* XXXX */ int error; - int resid=0; task_t task; size_t mach_header_sz = sizeof(struct mach_header); boolean_t abi64; + boolean_t verbose, patcher_enabled, force_patch; + boolean_t cs_enabled; boolean_t got_code_signatures = FALSE; + boolean_t apple_cs = FALSE; + uint32_t boot_arg; + boolean_t needs_stock_dyld = !strcmp(data->name, "translate"); + + check_macho_data(data); + + /* todo: we are assuming that patcher_opts will not change for the duration of the + * mach-o loading process. it would be better to cache the value and pass it as an + * argument where necessary from here on. */ + + verbose = (KERN_patcherOpts & OPT_PATCHER_DEBUG) != 0; + patcher_enabled = (KERN_patcherOpts & OPT_PATCHER_ALL) != 0; + force_patch = (KERN_patcherOpts & OPT_PATCHER_FORCE) != 0; + + cs_enabled = data->vp != NULL; if (header->magic == MH_MAGIC_64 || header->magic == MH_CIGAM_64) { @@ -458,7 +638,12 @@ return(LOAD_BADARCH); abi64 = ((header->cputype & CPU_ARCH_ABI64) == CPU_ARCH_ABI64); - + + if (verbose) + printf("loading %s (custom: %s, abi64: %s)\n", data->name, + (data->custom ? "true" : "false"), + (abi64 ? "true" : "false")); + switch (header->filetype) { case MH_OBJECT: @@ -489,7 +674,7 @@ /* * Get the pager for the file. */ - control = ubc_getobject(vp, UBC_FLAGS_NONE); + control = ubc_getobject(data->vp, UBC_FLAGS_NONE); /* * Map portion that must be accessible directly into @@ -504,29 +689,51 @@ size = round_page(mach_header_sz + header->sizeofcmds); if (size <= 0) return(LOAD_BADMACHO); + if (data->cmds_size) + ASSERT(data->cmds_size == size); /* * Map the load commands into kernel memory. */ addr = 0; - kl_size = size; - kl_addr = kalloc(size); - addr = (caddr_t)kl_addr; - if (addr == NULL) - return(LOAD_NOSPACE); - - error = vn_rdwr(UIO_READ, vp, addr, size, file_offset, - UIO_SYSSPACE, 0, kauth_cred_get(), &resid, p); - if (error) { - if (kl_addr ) - kfree(kl_addr, kl_size); - return(LOAD_IOERROR); - } + if (!data->cmds) { + addr = kalloc(size); + if (!addr) { + printf("kalloc for commands failed\n"); + return LOAD_NOSPACE; + } + + if (data->vp) { + int resid = 0; + error = vn_rdwr(UIO_READ, data->vp, addr, size, file_offset, + + UIO_SYSSPACE32, 0, kauth_cred_get(), &resid, p); + if (error || (resid != 0)) { + printf("vn_rdwr for commands failed\n"); + if (addr) + kfree(addr, size); + return LOAD_IOERROR; + } + } else if (data->custom) { + ret = read_from_custom_binary(data->custom, file_offset, size, addr); + if (ret != KERN_SUCCESS) { + printf("read_from_custom_binary failed\n"); + if (addr) + kfree(addr, size); + return LOAD_FAILURE; + } + } + + data->cmds = addr; + data->cmds_size = size; + } else + addr = data->cmds; + + ASSERT(addr); /* * Scan through the commands, processing each one as necessary. */ - for (pass = 1; pass <= 2; pass++) { + for (pass = 0; pass <= 2; pass++) { /* * Loop through each of the load_commands indicated by the * Mach-O header; if an absurd value is provided, we just @@ -563,18 +770,68 @@ * intervention is required. */ switch(lcp->cmd) { + + struct segment_command_64 *scp64; + struct segment_command *scp; + struct linkedit_data_command *ldcp; + boolean_t should_patch; + case LC_SEGMENT: + if (pass != 1) + break; + scp = (struct segment_command *) lcp; + should_patch = force_patch || apple_cs; + if (IsIntelCPU() && !PE_parse_boot_argn("-amd", &boot_arg, sizeof(boot_arg))) + { + ret = load_segment(lcp, + header->filetype, + control, + file_offset, + macho_size, + data->vp, + map, + result); + } else { + if (verbose && !should_patch && scp->segname && + !strcmp(scp->segname, "__TEXT")) + printf("no apple code signature, not patching binary\n"); + ret = load_segment_patched(data, file_offset, macho_size, map, result, abi64, + FALSE, (uint64_t) scp->vmaddr, (uint64_t) scp->vmsize, + scp->fileoff, scp->filesize, scp->maxprot, scp->initprot, + scp->flags, scp->segname, !should_patch, FALSE, NULL); + if ((ret == LOAD_SUCCESS) && (header->filetype == MH_DYLINKER) && (result->all_image_info_addr == MACH_VM_MIN_ADDRESS)) + note_all_image_info_section((const struct segment_command_64 *)scp, FALSE, sizeof(struct section), + (const char *)lcp + sizeof(struct segment_command), result); + + } + break; case LC_SEGMENT_64: if (pass != 1) break; - ret = load_segment(lcp, - header->filetype, - control, - file_offset, - macho_size, - vp, - map, - result); + scp64 = (struct segment_command_64 *) lcp; + should_patch = force_patch || apple_cs; + if (IsIntelCPU() && !PE_parse_boot_argn("-amd", &boot_arg, sizeof(boot_arg))) + { + ret = load_segment(lcp, + header->filetype, + control, + file_offset, + macho_size, + data->vp, + map, + result); + } else { + if (verbose && !should_patch && scp64->segname && + !strcmp(scp64->segname, "__TEXT")) + printf("no apple code signature, not patching binary\n"); + ret = load_segment_patched(data, file_offset, macho_size, map, result, abi64, + TRUE, scp64->vmaddr, scp64->vmsize, scp64->fileoff, + scp64->filesize, scp64->maxprot, scp64->initprot, + scp64->flags, scp64->segname, !should_patch, FALSE, NULL); + if ((ret == LOAD_SUCCESS) && (header->filetype == MH_DYLINKER) && (result->all_image_info_addr == MACH_VM_MIN_ADDRESS)) + note_all_image_info_section((const struct segment_command_64 *)scp64, TRUE, sizeof(struct section_64), + (const char *)lcp + sizeof(struct segment_command_64), result); + } break; case LC_THREAD: if (pass != 2) @@ -609,15 +866,44 @@ break; case LC_CODE_SIGNATURE: /* CODE SIGNING */ - if (pass != 2) + if (pass != 0) + break; + ldcp = (struct linkedit_data_command *) lcp; + /* todo: implement patcher_opts option to control cs ident checking */ + if (data->vp && patcher_enabled) { + char ident[50]; + static const char apple_ident[] = "com.apple."; + static const size_t apple_ident_len = sizeof apple_ident - 1; + ret = get_cs_ident(data->vp, file_offset, macho_size, + ldcp->dataoff, ldcp->datasize, ident, 50); + if (ret != KERN_SUCCESS) { + printf("warning: unable to obtain identifier string " + "from blob\n"); + ret = LOAD_SUCCESS; /* ignore error */ + break; + } + if (!strncmp(ident, apple_ident, apple_ident_len)) { + /* since translate seems to be the only known apple + * binary which does not function with cpuid patches, + * check for it here. */ + apple_cs = TRUE; + } + if (verbose) + printf("cs identifier: %s (apple_cs: %s)\n", ident, + (apple_cs ? "true" : "false")); + } + /* todo: load code signatures if no patches were required (this will + * probably require another pass). */ + cs_enabled = (cs_enabled && !apple_cs); + if (!cs_enabled) break; /* pager -> uip -> load signatures & store in uip set VM object "signed_pages" */ ret = load_code_signature( - (struct linkedit_data_command *) lcp, - vp, + ldcp, + data->vp, file_offset, macho_size, header->cputype, @@ -625,7 +911,7 @@ if (ret != LOAD_SUCCESS) { printf("proc %d: load code signature error %d " "for file \"%s\"\n", - p->p_pid, ret, vp->v_name); + p->p_pid, ret, data->vp->v_name); ret = LOAD_SUCCESS; /* ignore error */ } else { got_code_signatures = TRUE; @@ -637,11 +923,11 @@ break; ret = set_code_unprotect( (struct encryption_info_command *) lcp, - addr, map, vp); + addr, map, data->vp); if (ret != LOAD_SUCCESS) { printf("proc %d: set_code_unprotect() error %d " "for file \"%s\"\n", - p->p_pid, ret, vp->v_name); + p->p_pid, ret, data->vp->v_name); /* Don't let the app run if it's * encrypted but we failed to set up the * decrypter */ @@ -661,18 +947,21 @@ break; } if (ret == LOAD_SUCCESS) { - if (! got_code_signatures) { + if (!cs_enabled) + result->csflags = CS_VALID; + else if (!got_code_signatures) { struct cs_blob *blob; /* no embedded signatures: look for detached ones */ - blob = ubc_cs_blob_get(vp, -1, file_offset); + blob = ubc_cs_blob_get(data->vp, -1, file_offset); if (blob != NULL) { /* get flags to be applied to the process */ result->csflags |= blob->csb_flags; } } - if (dlp != 0) - ret = load_dylinker(dlp, dlarchbits, map, thread, depth, result, abi64); + if (dlp != 0) { + ret = load_dylinker(dlp, dlarchbits, map, thread, depth, result, abi64, needs_stock_dyld); + } if(depth == 1) { if (result->thread_count == 0) { @@ -693,8 +982,8 @@ } } - if (kl_addr ) - kfree(kl_addr, kl_size); + if (data->vp && (ret == LOAD_SUCCESS)) + ubc_map(data->vp, PROT_READ | PROT_EXEC); return(ret); } @@ -703,6 +992,45 @@ #define APPLE_UNPROTECTED_HEADER_SIZE (3 * PAGE_SIZE_64) +static int unprotect_patched_segment(uint8_t *data, size_t offset, size_t size) +{ + if (offset & PAGE_MASK) { + printf("error: offset not page-aligned\n"); + return LOAD_FAILURE; + } else if (size & PAGE_MASK) { + printf("error: size not page-aligned\n"); + return LOAD_FAILURE; + } + + if (offset <= APPLE_UNPROTECTED_HEADER_SIZE) { + if ((offset + size) <= APPLE_UNPROTECTED_HEADER_SIZE) + size = 0; + else { + size_t delta = APPLE_UNPROTECTED_HEADER_SIZE - offset; + offset += delta; + size -= delta; + } + } + + if (!size) + return LOAD_SUCCESS; + + data += offset; + do { + /* mercurysquad: updated to transfrom the page in-place using dsmos hook */ + /* AnV - FIXME: decryption is not allways succesfull from first pass (no problem as it will try again) + * Modifications done for Snow Leopard + */ + if (dsmos_page_transform(data, data, 0, (void *)0x2e69cf40) != KERN_SUCCESS) + return LOAD_FAILURE; + data += PAGE_SIZE; + } while ((size -= PAGE_SIZE) >= PAGE_SIZE); + + ASSERT(!size); /* should be guaranteed by offset/size page alignment checks */ + + return LOAD_SUCCESS; +} + static load_return_t unprotect_segment( uint64_t file_off, @@ -770,6 +1098,384 @@ } #endif /* CONFIG_CODE_DECRYPTION */ +static kern_return_t patch_exec_obj_segment( + vm_object_t exec_obj, + mach_vm_offset_t map_offset, + mach_vm_size_t map_size, + mach_vm_offset_t map_addr, + boolean_t abi_is_64, + boolean_t seg_is_64, + uint32_t fileoff, + uint32_t filesize, + boolean_t is_encrypted, + boolean_t verbose, + boolean_t dont_patch, + uint32_t *num_patches_out, + boolean_t *bypass) +{ + kern_return_t ret; + boolean_t success = FALSE; + vm_prot_t new_prot = VM_PROT_READ | VM_PROT_WRITE; + vm_map_offset_t /* uint64_t */ tmp_addr = 0; + vm_address_t /* uint32_t */ new_addr = 0; + uint32_t num_patches, num_bad; + + /* todo: we should be able to assert that the map offset and size are page-aligned. + * if this is not the case, kernel map entries will not be correctly deallocated + * upon vm_map_remove. don't assert this for now to be on the safe side. */ + + exec_obj_reference(exec_obj); + ret = vm_map_enter(kernel_map, &tmp_addr, map_size, PAGE_MASK, VM_FLAGS_ANYWHERE, + exec_obj, map_offset, TRUE, new_prot, new_prot, VM_INHERIT_NONE); + if (ret != KERN_SUCCESS) { + printf("vm_map_enter failed\n"); + exec_obj_deallocate(exec_obj); + goto cleanup; + } + new_addr = tmp_addr; + + ret = (kern_return_t) vm_map_check_protection(kernel_map, new_addr, + new_addr + map_size, new_prot); + if (!ret) { + printf("vm_map_check_protection failed\n"); + goto cleanup; + } + + if (is_encrypted) { + ret = (kern_return_t) unprotect_patched_segment((uint8_t *)new_addr, fileoff, filesize); + if (ret != KERN_SUCCESS) + { + printf("Decryption hasn't succeeded yet (probably will retry)\n"); + goto cleanup; + } + } + if (!dont_patch) { + ret = patch_text_segment((uint8_t *) new_addr, map_addr, map_size, abi_is_64, + seg_is_64, verbose, bypass, &num_patches, &num_bad); + if (ret != KERN_SUCCESS) { + printf("patch_text_segment failed\n"); + goto cleanup; + } + } else { + *bypass = FALSE; + num_patches = 0; + num_bad = 0; + } + + *num_patches_out = num_patches; + + success = TRUE; + cleanup: +#ifdef EXTRA_VERBOSE + if (verbose) + dump_vm_object(exec_obj, success ? "peos success" : "peos cleanup", + DUMP_VM_OBJ_REFCOUNT); +#endif + if (new_addr) { + /* vm_map_remove drops a reference on the object if successful */ + ret = vm_map_remove(kernel_map, new_addr, new_addr + map_size, VM_MAP_NO_FLAGS); + if (ret != KERN_SUCCESS) + printf("vm_map_remove failed\n"); + } + + return (success ? KERN_SUCCESS : KERN_FAILURE); +} + +static boolean_t patches_exist_for_vnode(vnode_t vp) +{ + boolean_t patches_exist = FALSE; + patcher_arch_t arch; + + for (arch = 0; !patches_exist && (arch < PATCHER_ARCH_MAX); arch++) + if (vp->v_ubcinfo->num_patches[arch] > 0) + patches_exist = TRUE; + + return patches_exist; +} + +static const char *patcher_arch_to_string(patcher_arch_t arch) +{ + const char *arch_string; + + switch (arch) { + case PATCHER_ARCH_I386: + arch_string = "i386"; + break; + case PATCHER_ARCH_X86_64: + arch_string = "x86_64"; + break; + default: + arch_string = "unknown"; + break; + } + + return arch_string; +} + +kern_return_t map_segment( + vnode_t vp, + vm_map_t map, + mach_vm_offset_t map_offset, + mach_vm_size_t map_size, + mach_vm_offset_t map_addr, + boolean_t seg_is_text, + boolean_t abi_is_64, + boolean_t seg_is_64, + uint32_t fileoff, + uint32_t filesize, + vm_prot_t maxprot, + vm_prot_t initprot, + boolean_t is_encrypted, + boolean_t verbose, + boolean_t dont_patch) +{ + kern_return_t ret; + memory_object_t pager; + int map_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE; + patcher_arch_t arch = abi_is_64 ? PATCHER_ARCH_X86_64 : PATCHER_ARCH_I386; + const char *arch_string = patcher_arch_to_string(arch); + + pager = ubc_getpager(vp); + if (!pager) { + printf("ubc_getpager failed\n"); + return KERN_FAILURE; + } + + /* we take this path in two cases: (1) the segment contains code (ie. the text segment) + * and patching has been enabled either forcefully or automatically. each unique binary is + * scanned only once; the same exec_obj is mapped for repeated loads of the segment. if no + * patches were needed (num_patches was set to -1) the segment is mapped normally. (2) with + * encrypted binaries, for which the apple protect pager is no longer used. */ + + /* todo: lock vnode while we are working with ubcinfo */ + + if (seg_is_text && ((KERN_patcherOpts & OPT_PATCHER_ALL) || is_encrypted) && + (vp->v_ubcinfo->num_patches[arch] >= 0)) { + vm_object_t exec_obj; + int32_t num_patches; + + if (!vp->v_ubcinfo->num_patches[arch]) { + uint32_t tmp_num_patches; + boolean_t bypass = FALSE; + boolean_t patches_exist; + + if (verbose) + printf("scanning binary (num_patches[%s] == 0)\n", arch_string); + + patches_exist = patches_exist_for_vnode(vp); + exec_obj = vp->v_ubcinfo->exec_obj; + if (!exec_obj) { + ASSERT(!patches_exist); + exec_obj = exec_obj_create(pager, map_offset, map_size, verbose); + if (!exec_obj) { + printf("exec_obj_create returned null object\n"); + return KERN_FAILURE; + } + } else + ASSERT(patches_exist); + + /* make the optimizer happy, we don't rely on this value being sane */ + tmp_num_patches = 0; + + /* if we only got here because the binary is encrypted, don't attempt to + * patch anything */ + if (!(KERN_patcherOpts & OPT_PATCHER_ALL)) + dont_patch = TRUE; + + ret = patch_exec_obj_segment(exec_obj, map_offset, map_size, map_addr, + abi_is_64, seg_is_64, fileoff, filesize, is_encrypted, + verbose, dont_patch, &tmp_num_patches, &bypass); + if (bypass) { + /* some binaries (ie. packed with upx or similar tools) don't have + * a text section, so we continue loading normally in such cases. + * we can also get here if the patcher determined that the text + * section does not begin with valid code, and as such probably + * is packed with some other tool. */ + exec_obj = NULL; + num_patches = -1; + } else if (ret != KERN_SUCCESS) { + printf("patch_exec_obj_segment failed\n"); + exec_obj_deallocate(exec_obj); + return KERN_FAILURE; + } else + num_patches = (int32_t) tmp_num_patches; + + /* if the binary needed decryption, then preserve the new object + * regardless of whether any patches were needed */ + if (is_encrypted && !num_patches) + num_patches = 1; + + if (!num_patches) { + exec_obj_deallocate(exec_obj); + vp->v_ubcinfo->num_patches[arch] = -1; + } else { + if (!vp->v_ubcinfo->exec_obj) + vp->v_ubcinfo->exec_obj = exec_obj; + vp->v_ubcinfo->num_patches[arch] = num_patches; + } + } else { + if (verbose) + printf("using cached exec_obj (num_patches[%s] > 0)\n", arch_string); + + exec_obj = vp->v_ubcinfo->exec_obj; + num_patches = vp->v_ubcinfo->num_patches[arch]; + } + + if (num_patches > 0) { + if (verbose) + printf("mapping exec_obj %p (num_patches[%s]: %d)\n", exec_obj, + arch_string, num_patches); + + exec_obj_reference(exec_obj); + ret = vm_map_enter(map, &map_addr, map_size, 0, map_flags, exec_obj, + map_offset, TRUE, initprot, maxprot, VM_INHERIT_DEFAULT); + if (ret != KERN_SUCCESS) { + printf("vm_map_enter into target task failed\n"); + exec_obj_deallocate(exec_obj); + return KERN_FAILURE; + } +#ifdef EXTRA_VERBOSE + if (verbose) + dump_vm_object(exec_obj, "ms map", DUMP_VM_OBJ_REFCOUNT); +#endif + return KERN_SUCCESS; + } + + /* fall through here if no patches were needed (this won't be reached again + * unless the binary changes the next time around). */ + } + + if (seg_is_text && verbose) + printf("mapping normally (num_patches[%s] == -1)\n", arch_string); + + ret = vm_map_enter_mem_object(map, &map_addr, map_size, 0, map_flags, (ipc_port_t) pager, + map_offset, TRUE, initprot, maxprot, VM_INHERIT_DEFAULT); + if (ret != KERN_SUCCESS) { + printf("vm_map_enter_mem_object failed\n"); + return KERN_FAILURE; + } + + return KERN_SUCCESS; +} + +kern_return_t patch_custom_binary( + custom_binary_t custom, + integer_t cputype, + boolean_t verbose) +{ + kern_return_t ret; + boolean_t result = KERN_FAILURE; + vm_offset_t map_addr = 0, new_addr; + vm_prot_t prot = VM_PROT_READ | VM_PROT_WRITE; + struct fat_header *fat_header; + uint32_t macho_offset; + struct mach_header *mach_header; + boolean_t abi_is_64, seg_is_64; + uint32_t seg_offset; + uint8_t *seg_addr; + vm_size_t seg_max_size; + boolean_t bypass; + uint32_t num_patches, num_bad; + vm_map_size_t pageoff; + vm_offset_t new_offset; + vm_size_t new_size; + + check_custom_binary(custom); + + pageoff = custom->offset & PAGE_MASK; + new_offset = custom->offset - pageoff; + new_size = vm_map_round_page(custom->size + pageoff); + + ret = vm_map(kernel_map, &map_addr, new_size, PAGE_MASK, VM_FLAGS_ANYWHERE, custom->object, + new_offset, FALSE, prot, prot, VM_INHERIT_NONE); + if (ret != KERN_SUCCESS) { + printf("vm_map failed\n"); + goto error; + } + new_addr = map_addr + pageoff; + + fat_header = (struct fat_header *) new_addr; + if (fat_header->magic == MH_MAGIC || fat_header->magic == MH_MAGIC_64) { + macho_offset = 0; + mach_header = (struct mach_header *) new_addr; + } else if (fat_header->magic == FAT_MAGIC || fat_header->magic == FAT_CIGAM) { + struct fat_arch fat_arch; + + ret = fatfile_getarch2(NULL, (vm_offset_t) fat_header, cputype, CPU_SUBTYPE_LIB64, + &fat_arch); + if (ret != LOAD_SUCCESS) { + printf("fatfile_getarch2 failed\n"); + goto error; + } + + if ((fat_arch.offset + fat_arch.size) > custom->size) { + printf("arch offset/size not in mapped range\n"); + goto error; + } + macho_offset = fat_arch.offset; + mach_header = (struct mach_header *) (new_addr + macho_offset); + if ((mach_header->magic != MH_MAGIC) && (mach_header->magic != MH_MAGIC_64)) { + printf("invalid mach-o file at arch offset\n"); + goto error; + } + } else { + printf("invalid mach-o file\n"); + goto error; + } + + /* note: mach_header and mach_header_64 are the same except that the latter has an + * extra 32-bit reserved field. */ + + if (mach_header->cputype != cputype) { + printf("couldn't find target arch\n"); + goto error; + } + + abi_is_64 = (mach_header->cputype & CPU_ARCH_ABI64) == CPU_ARCH_ABI64; + seg_is_64 = (mach_header->magic == MH_MAGIC_64) || (mach_header->magic == MH_CIGAM_64); + + if (seg_is_64) { + struct segment_command_64 *scp64 = getsegforpatch_64((struct mach_header_64 *)mach_header, "__TEXT"); + if (!scp64) { + printf("getsegforpatch_64 failed\n"); + goto error; + } else if ((macho_offset + scp64->fileoff + scp64->filesize) > custom->size) { + printf("segment offset/size not in mapped range\n"); + goto error; + } + seg_offset = scp64->fileoff; + } else { + struct segment_command *scp = getsegforpatch(mach_header, "__TEXT");; + if (!scp) { + printf("getsegforpatch failed\n"); + goto error; + } else if ((macho_offset + scp->fileoff + scp->filesize) > custom->size) { + printf("segment offset/size not in mapped range\n"); + goto error; + } + seg_offset = scp->fileoff; + } + + seg_addr = (uint8_t *) (new_addr + macho_offset + seg_offset); + seg_max_size = custom->size - macho_offset - seg_offset; + ret = patch_text_segment(seg_addr, /* unused */ 0, seg_max_size, abi_is_64, seg_is_64, + verbose, &bypass, &num_patches, &num_bad); + if (!bypass && (ret != KERN_SUCCESS)) { + printf("patch_text_segment failed\n"); + goto error; + } + + result = KERN_SUCCESS; + +error: + if (map_addr) { + ret = vm_map_remove(kernel_map, map_addr, map_addr + new_size, VM_MAP_NO_FLAGS); + if (ret != KERN_SUCCESS) + printf("warning: vm_map_remove failed\n"); + } + return result; +} + static load_return_t load_segment( @@ -1003,6 +1709,183 @@ return(LOAD_SUCCESS); } +static void check_custom_binary( + custom_binary_t custom +) +{ + ASSERT(custom); + ASSERT(custom->object); + ASSERT(custom->size); +} + +__unused static void free_custom_binary( + custom_binary_t custom +) +{ + check_custom_binary(custom); + + ipc_port_release(custom->object); + custom->object = NULL; + + custom->offset = 0; + custom->size = 0; + + /* unlike with macho_data, custom_binary instances are dynamically allocated */ + kfree(custom, sizeof *custom); +} + +static kern_return_t create_custom_binary( + const void *data, + vm_size_t size, + custom_binary_t *custom +) +{ + kern_return_t ret; + boolean_t result = KERN_FAILURE; + vm_offset_t addr = 0; + memory_object_size_t tmp_size; + vm_size_t new_size; + custom_binary_t new_custom = NULL; + ipc_port_t object = NULL; + + new_size = vm_map_round_page(size); + + ret = vm_map(kernel_map, &addr, new_size, PAGE_MASK, VM_FLAGS_ANYWHERE, NULL, 0, + FALSE, VM_PROT_ALL, VM_PROT_ALL, VM_INHERIT_NONE); + if (ret != KERN_SUCCESS) { + printf("vm_map failed\n"); + goto error; + } + + memcpy((void *) addr, data, size); + + tmp_size = new_size; + ret = mach_make_memory_entry_64(kernel_map, &tmp_size, addr, VM_PROT_ALL, + &object, NULL); + if ((ret != KERN_SUCCESS) || !object) { + printf("mach_make_memory_entry_64 failed\n"); + goto error; + } + + new_custom = (custom_binary_t) kalloc(sizeof *new_custom); + if (!new_custom) { + printf("kalloc for custom binary failed\n"); + goto error; + } + new_custom->object = object; + new_custom->offset = 0; + new_custom->size = size; + check_custom_binary(new_custom); + + *custom = new_custom; + + result = KERN_SUCCESS; + +error: + if (result != KERN_SUCCESS) { + if (object) + ipc_port_release(object); + if (new_custom) { + /* if the object existed then it has already been freed */ + bzero(new_custom, sizeof *new_custom); /* just in case */ + kfree(new_custom, sizeof *new_custom); + } + } + + if (addr) { + ret = vm_map_remove(kernel_map, addr, addr + new_size, VM_MAP_NO_FLAGS); + if (ret != KERN_SUCCESS) + printf("warning: vm_map_remove failed\n"); + } + + return result; +} + +static kern_return_t read_from_custom_binary( + custom_binary_t custom, + vm_offset_t offset, + vm_size_t size, + void *dest +) +{ + kern_return_t ret; + boolean_t result = KERN_FAILURE; + vm_offset_t map_addr = 0; + vm_map_size_t pageoff; + vm_offset_t new_offset; + vm_size_t new_size = 0; + + check_custom_binary(custom); + + if (!size) { + printf("error: requested size is zero\n"); + goto error; + } else if ((offset + size) > custom->size) { + printf("error: requested offset and size exceed custom binary size\n"); + goto error; + } + + pageoff = offset & PAGE_MASK; + new_offset = offset - pageoff; + new_size = vm_map_round_page(size + pageoff); + + ret = vm_map(kernel_map, &map_addr, new_size, PAGE_MASK, VM_FLAGS_ANYWHERE, custom->object, + new_offset, FALSE, VM_PROT_READ, VM_PROT_READ, VM_INHERIT_NONE); + if (ret != KERN_SUCCESS) { + printf("vm_map failed\n"); + goto error; + } + + memcpy(dest, ((uint8_t *) map_addr) + pageoff, size); + + result = KERN_SUCCESS; + +error: + if (map_addr) { + ret = vm_map_remove(kernel_map, map_addr, map_addr + new_size, VM_MAP_NO_FLAGS); + if (ret != KERN_SUCCESS) + printf("warning: vm_map_remove failed\n"); + } + + return result; +} + +static void check_macho_data( + macho_data_t data +) +{ + ASSERT(data); + ASSERT(!(data->vp && data->custom)); + ASSERT(data->vp || data->custom); + if (data->cmds) { + ASSERT(data->cmds_size); + ASSERT(!(data->cmds_size & PAGE_MASK)); + } else + ASSERT(!data->cmds_size); + ASSERT(data->name); +} + +static void free_macho_data( + macho_data_t data +) +{ + check_macho_data(data); + + /* the custom binary or vnode instance is considered external to the macho_data + * "instance" (not dynamically allocated for performance reasons). in the future + * perhaps macho_data instances can be cached in the corresponding ubc node. */ + + data->vp = NULL; + data->custom = NULL; + data->name = NULL; + + if (data->cmds) { + kfree(data->cmds, data->cmds_size); + data->cmds = NULL; + data->cmds_size = 0; + } +} + static load_return_t load_unixthread( @@ -1183,6 +2066,297 @@ return(LOAD_SUCCESS); } +int map_text_segment_sys(struct proc * proc, struct map_text_segment_sys_args * args, user_addr_t * retval) +{ + int ret, error = 0; + kauth_cred_t kern_cred; + struct fileproc *fp = NULL; + vnode_t vp = NULL; + boolean_t has_vnode_ref = FALSE; + boolean_t seg_is_64; + boolean_t abi_is_64; + struct mach_header header; + size_t cmds_size, hdr_size; + void *cmds = NULL; + int resid; + struct macho_data data; + boolean_t should_free_data = FALSE; + vm_map_t user_map; + load_result_t load_result; + mach_vm_offset_t map_addr; + +#ifdef EXTRA_VERBOSE + printf("fd: %d, arch_offset: 0x%x, arch_size: 0x%x, map_addr: 0x%llx, mode: %d\n", + args->fd, args->arch_offset, args->arch_size, args->map_addr, args->mode); +#endif + + if (!args->arch_size) { + printf("arch_size is zero\n"); + error = EINVAL; + goto out; + } + + switch (args->mode) { + case MTS_MODE_DEFAULT: + map_addr = -1ULL; + break; + case MTS_MODE_FIXED: + map_addr = args->map_addr; + break; + case MTS_MODE_ANYWHERE: + /* not yet implemented */ + default: + printf("unknown mode\n"); + error = EINVAL; + goto out; + } + + kern_cred = kauth_cred_get(); + if (!kern_cred) { + printf("kauth_cred_get failed\n"); + error = EINVAL; + goto out; + } + + ret = fp_lookup(proc, args->fd, &fp, 0); + if (ret != 0) { + printf("fp_lookup failed\n"); + error = ret; + goto out; + } + + if (fp->f_fglob->fg_type != DTYPE_VNODE) { + printf("fd does not represent vnode\n"); + error = EINVAL; + goto out; + } + vp = (vnode_t) fp->f_fglob->fg_data; + + ret = vnode_getwithref(vp); + if (ret != 0) { + printf("vnode_getwithref failed\n"); + error = EINVAL; + goto out; + } + has_vnode_ref = TRUE; + + if (KERN_patcherOpts & OPT_PATCHER_DEBUG) + printf("mts: mapping %s (offset: 0x%x, size: 0x%x, addr: 0x%llx)\n", vp->v_name, + args->arch_offset, args->arch_size, args->map_addr); + + if (vp->v_type != VREG) { + printf("vnode type is not regular file\n"); + error = EINVAL; + goto out; + } + + resid = 0; + ret = vn_rdwr(UIO_READ, vp, (caddr_t) &header, sizeof (header), args->arch_offset, + UIO_SYSSPACE32, 0, kern_cred, &resid, proc); + if ((ret != 0) || (resid != 0)) { + printf("vn_rdwr for header failed\n"); + error = EIO; + goto out; + } + + abi_is_64 = (header.cputype & CPU_ARCH_ABI64) == CPU_ARCH_ABI64; + + if ((header.magic == MH_MAGIC_64) || (header.magic == MH_CIGAM_64)) { + seg_is_64 = TRUE; + hdr_size = sizeof (struct mach_header_64); + } else if ((header.magic == MH_MAGIC) || (header.magic == MH_CIGAM)) { + seg_is_64 = FALSE; + hdr_size = sizeof (struct mach_header); + } else { + printf("file is not valid mach-o\n"); + error = EBADMACHO; + goto out; + } + + cmds_size = round_page(hdr_size + header.sizeofcmds); + cmds = kalloc(cmds_size); /* freed by free_macho_data */ + if (!cmds) { + printf("kalloc failed\n"); + error = ENOMEM; + goto out; + } + + resid = 0; + ret = vn_rdwr(UIO_READ, vp, cmds, cmds_size, args->arch_offset, UIO_SYSSPACE32, 0, + kern_cred, &resid, proc); + if ((ret != 0) || (resid != 0)) { + printf("vn_rdwr for commands failed\n"); + error = EIO; + goto out; + } + + data.vp = vp; + data.custom = NULL; + data.name = vp->v_name; + data.cmds = cmds; + data.cmds_size = cmds_size; + + should_free_data = TRUE; + + user_map = current_map(); + + if (seg_is_64) { + struct segment_command_64 *scp64 = getsegforpatch_64((struct mach_header_64 *)cmds, "__TEXT"); + if (!scp64) { + printf("getsegforpatch_64 failed\n"); + error = EBADMACHO; + goto out; + } + ret = load_segment_patched(&data, args->arch_offset, args->arch_size, user_map, + &load_result, abi_is_64, seg_is_64, scp64->vmaddr, scp64->vmsize, + scp64->fileoff, scp64->filesize, scp64->maxprot, scp64->initprot, + scp64->flags, scp64->segname, FALSE, FALSE, &map_addr); + } else { + struct segment_command *scp = getsegforpatch((struct mach_header *)cmds, "__TEXT"); + if (!scp) { + printf("getsegforpatch failed\n"); + error = EBADMACHO; + goto out; + } + ret = load_segment_patched(&data, args->arch_offset, args->arch_size, user_map, + &load_result, abi_is_64, seg_is_64, scp->vmaddr, scp->vmsize, + scp->fileoff, scp->filesize, scp->maxprot, scp->initprot, + scp->flags, scp->segname, FALSE, FALSE, &map_addr); + } + + if (ret != LOAD_SUCCESS) { + printf("load_segment failed\n"); + error = EBADMACHO; + goto out; + } + +out: + if (should_free_data) + free_macho_data(&data); + if (has_vnode_ref) + vnode_put(vp); + if (fp) + fp_drop(proc, args->fd, fp, 0); + + if (error) + map_addr = -1ULL; + *retval = map_addr; + + return error; +} + +static load_return_t load_segment_patched( + macho_data_t data, + off_t /* int64_t */ pager_offset, + off_t /* int64_t */ macho_size, + vm_map_t map, + load_result_t *result, + boolean_t abi_is_64, + boolean_t seg_is_64, + uint64_t vmaddr, + uint64_t vmsize, + uint32_t fileoff, + uint32_t filesize, + vm_prot_t maxprot, + vm_prot_t initprot, + uint32_t flags, + char *segname, + boolean_t dont_patch, + __unused boolean_t anywhere, + mach_vm_offset_t *user_addr) +{ + kern_return_t ret = KERN_SUCCESS; + mach_vm_offset_t map_addr, map_offset; + mach_vm_size_t map_size, seg_size, delta_size, total_size; + boolean_t seg_is_text; + boolean_t verbose; + boolean_t is_encrypted; + + check_macho_data(data); + + seg_is_text = (segname && !strcmp(segname, "__TEXT")); + verbose = (KERN_patcherOpts & OPT_PATCHER_DEBUG) != 0; + is_encrypted = (flags & SG_PROTECTED_VERSION_1) != 0; + + maxprot &= VM_PROT_ALL; + initprot &= VM_PROT_ALL; + + if ((fileoff + filesize) > (uint64_t) macho_size) { + printf("error: file offset and size exceed arch size\n"); + return LOAD_BADMACHO; + } + + if (fileoff & PAGE_MASK_64) { + printf("error: file offset not page-aligned\n"); + return LOAD_BADMACHO; + } + + map_size = round_page_64(filesize); /* limited to 32 bits */ + + if (user_addr && (*user_addr != -1ULL)) + map_addr = *user_addr; + else + map_addr = round_page_64(vmaddr); + + seg_size = round_page_64(vmsize); + if (!seg_size) + goto success; + + map_offset = pager_offset + fileoff; /* limited to 32 bits */ + delta_size = seg_size - map_size; + total_size = map_size + delta_size; + + if (!fileoff && filesize) + result->mach_header = map_addr; + + /* If the virtual size of the segment is greater than the size from the file, we need + * to allocate zero fill memory for the rest. */ + if (delta_size > 0) { + mach_vm_offset_t tmp = map_addr + map_size; + ret = mach_vm_map(map, &tmp, delta_size, 0, VM_FLAGS_FIXED, NULL, + 0, FALSE, initprot, maxprot, VM_INHERIT_DEFAULT); + if (ret != KERN_SUCCESS) { + printf("mach_vm_map failed\n"); + return LOAD_NOSPACE; + } + } + + if (!map_size) + goto success; + + if (data->vp) { + if (seg_is_text && verbose) + printf("map_size: %llx, map_offset: %llx, vmsize: %llx, vmaddr: %llx\n", + + map_size, map_offset, vmsize, vmaddr); + ret = map_segment(data->vp, map, map_offset, map_size, map_addr, seg_is_text, + abi_is_64, seg_is_64, fileoff, filesize, maxprot, initprot, + is_encrypted, verbose, dont_patch); + if (ret != KERN_SUCCESS) { + printf("map_segment failed\n"); + return LOAD_FAILURE; + } + } else { + check_custom_binary(data->custom); + ret = mach_vm_map(map, &map_addr, map_size, 0, VM_FLAGS_FIXED, data->custom->object, + map_offset, TRUE, initprot, maxprot, VM_INHERIT_DEFAULT); + if (ret != KERN_SUCCESS) { + printf("mach_vm_map failed\n"); + return LOAD_NOSPACE; + } + } + +success: + if (user_addr) { + if ((*user_addr != -1ULL) && (map_addr != *user_addr)) + printf("warning: mapped address not requested address\n"); + *user_addr = map_addr; + } + + if (seg_is_text && verbose) + printf("map_addr: 0x%llx\n", map_addr); + + return LOAD_SUCCESS; +} static load_return_t @@ -1193,7 +2367,8 @@ thread_t thread, int depth, load_result_t *result, - boolean_t is_64bit + boolean_t is_64bit, + boolean_t force_std ) { char *name; @@ -1208,6 +2383,15 @@ vm_map_copy_t tmp; mach_vm_offset_t dyl_start, map_addr; mach_vm_size_t dyl_length; + struct macho_data data; + boolean_t should_free_data = FALSE; + uint32_t boot_arg; + static custom_binary_t dyld_custom = NULL; + static int32_t use_new_dyld = -1; + boolean_t verbose, patcher_enabled; + + verbose = (KERN_patcherOpts & OPT_PATCHER_DEBUG) != 0; + patcher_enabled = (KERN_patcherOpts & OPT_PATCHER_ALL) != 0; if (lcp->cmdsize < sizeof(*lcp)) return (LOAD_BADMACHO); @@ -1222,10 +2406,83 @@ return(LOAD_BADMACHO); } while (*p++); - ret = get_macho_vnode(name, archbits, &header, &file_offset, &macho_size, &vp); - if (ret) - return (ret); - + if (use_new_dyld == -1) { + if (!dyld_size) { + use_new_dyld = 0; + printf("warning: kernel was not built with custom dyld blob\n"); + } else if ((PE_parse_boot_argn("std_dyld", &boot_arg, sizeof(boot_arg)) && boot_arg) || (IsIntelCPU() && !PE_parse_boot_argn("-amd", &boot_arg, sizeof(boot_arg)))) + use_new_dyld = 0; + else { + use_new_dyld = 1; +#if 0 + if (KERN_patcherOpts & OPT_PATCHER_SYSENTER) { + if (verbose) + printf("patching sysenter_trap in custom dyld\n"); + ASSERT((dyld_size - dyld_sysenter_patch) >= 8); + patch_sysenter_trap(dyld_binary + dyld_sysenter_patch); + } +#endif + ASSERT(!dyld_custom); + ret = create_custom_binary(dyld_binary, dyld_size, &dyld_custom); + if ((ret != KERN_SUCCESS) || !dyld_custom) { + printf("create_custom_binary failed\n"); + return LOAD_FAILURE; + } + if (patcher_enabled) { + if (verbose) + printf("patching custom dyld (i386)\n"); + ret = patch_custom_binary(dyld_custom, CPU_TYPE_X86, verbose); + if (ret != KERN_SUCCESS) { + printf("patch_custom_binary for dyld (i386) failed\n"); + return LOAD_FAILURE; + } + if (ml_is64bit()) { + if (verbose) + printf("patching custom dyld (x86_64)\n"); + ret = patch_custom_binary(dyld_custom, CPU_TYPE_X86_64, verbose); + if (ret != KERN_SUCCESS) { + printf("patch_custom_binary for dyld (x86_64) failed\n"); + return LOAD_FAILURE; + } + } else if (verbose) + printf("system not 64-bit capable, won't patch x86_64 dyld\n"); + } + } + + if (!use_new_dyld && patcher_enabled) + printf("warning: using standard dyld with patcher enabled\n"); + } + + /* todo: read the header and commands once and cache that in macho_data (which would + * require interacting with the ubc node). alternatively, only cache for custom binaries + * and store the data in the custom_binary instance. */ + + if (!force_std && use_new_dyld && !strncmp(name, "/usr/lib/dyld", strlen(name))) { + ASSERT(dyld_custom); + + ret = parse_custom_binary(dyld_custom, archbits, &header, &file_offset, &macho_size); + if (ret != LOAD_SUCCESS) { + printf("parse_custom_binary failed\n"); + return ret; + } + + data.vp = NULL; + data.custom = dyld_custom; + data.name = "dyld"; + } else { + ret = get_macho_vnode(name, archbits, &header, &file_offset, &macho_size, &vp); + if (ret != LOAD_SUCCESS) + return ret; + + data.vp = vp; + data.custom = NULL; + data.name = vp->v_name; + } + + data.cmds = NULL; + data.cmds_size = 0; + should_free_data = TRUE; + myresult = load_result_null; /* @@ -1234,7 +2491,7 @@ * mapped to its address. */ - ret = parse_machfile(vp, map, thread, &header, file_offset, macho_size, + ret = parse_machfile(&data, map, thread, &header, file_offset, macho_size, depth, &myresult); /* @@ -1259,7 +2516,7 @@ myresult = load_result_null; - ret = parse_machfile(vp, copy_map, thread, &header, + ret = parse_machfile(&data, copy_map, thread, &header, file_offset, macho_size, depth, &myresult); @@ -1326,9 +2583,14 @@ result->entry_point = myresult.entry_point; result->all_image_info_addr = myresult.all_image_info_addr; result->all_image_info_size = myresult.all_image_info_size; + if (vp) + ubc_map(vp, PROT_READ | PROT_EXEC); } out: - vnode_put(vp); + if (should_free_data) + free_macho_data(&data); + if (vp) + vnode_put(vp); return (ret); } @@ -1700,3 +2962,90 @@ vnode_put(vp); return(error); } + +static +load_return_t +parse_custom_binary( + custom_binary_t custom, + integer_t archbits, + struct mach_header *mach_header, + off_t *file_offset, + off_t *macho_size +) +{ + kern_return_t ret; + boolean_t is_fat; + union { + struct mach_header mach_header; + struct fat_header fat_header; + char pad[512]; + } header; + + check_custom_binary(custom); + + ret = read_from_custom_binary(custom, 0, sizeof (header), &header); + if (ret != KERN_SUCCESS) { + printf("read_from_custom_binary failed\n"); + return LOAD_FAILURE; + } + + if ((header.mach_header.magic == MH_MAGIC) || (header.mach_header.magic == MH_MAGIC_64)) + is_fat = FALSE; + else if ((header.fat_header.magic == FAT_MAGIC) || (header.fat_header.magic == FAT_CIGAM)) + is_fat = TRUE; + else { + printf("invalid mach-o file\n"); + return LOAD_BADMACHO; + } + + if (is_fat) { + struct fat_arch fat_arch; + + /* Look up our architecture in the fat file. */ + ret = fatfile_getarch_with_bits(NULL, archbits, (vm_offset_t) &header.fat_header, &fat_arch); + if (ret != LOAD_SUCCESS) { + printf("fatfile_getarch_with_bits failed\n"); + return ret; + } else if ((fat_arch.offset + fat_arch.size) > custom->size) { + printf("arch offset/size exceed custom binary size\n"); + return LOAD_BADMACHO; + } + + /* Read the Mach-O header out of it */ + ret = read_from_custom_binary(custom, fat_arch.offset, sizeof (header.mach_header), + &header.mach_header); + if (ret != KERN_SUCCESS) { + printf("read_from_custom_binary failed\n"); + return LOAD_FAILURE; + } + + /* Is this really a Mach-O? */ + if ((header.mach_header.magic != MH_MAGIC) && (header.mach_header.magic != MH_MAGIC_64)) { + printf("found invalid mach-o file at arch offset\n"); + return LOAD_BADMACHO; + } + + *file_offset = fat_arch.offset; + *macho_size = fat_arch.size; + } else { + /* + * Force get_macho_vnode() to fail if the architecture bits + * do not match the expected architecture bits. This in + * turn causes load_dylinker() to fail for the same reason, + * so it ensures the dynamic linker and the binary are in + * lock-step. This is potentially bad, if we ever add to + * the CPU_ARCH_* bits any bits that are desirable but not + * required, since the dynamic linker might work, but we will + * refuse to load it because of this check. + */ + if (((cpu_type_t) (header.mach_header.cputype & CPU_ARCH_MASK)) != archbits) + return LOAD_BADARCH; + + *file_offset = 0; + *macho_size = custom->size; + } + + *mach_header = header.mach_header; + + return LOAD_SUCCESS; +} diff -Naur xnu-1504.9.26.orig/bsd/kern/mach_process.c xnu-1504.9.26/bsd/kern/mach_process.c --- xnu-1504.9.26.orig/bsd/kern/mach_process.c 2011-01-06 11:45:26.000000000 -0500 +++ xnu-1504.9.26/bsd/kern/mach_process.c 2011-01-09 16:00:20.000000000 -0500 @@ -105,6 +105,10 @@ extern thread_t port_name_to_thread(mach_port_name_t port_name); extern thread_t get_firstthread(task_t); +/* mercurysquad: declare sysctl variable which controls whether PT_DENY_ATTACH is enabled */ +int ptda_enabled = 1; +SYSCTL_INT(_debug, OID_AUTO, ptracedeny_enabled, CTLFLAG_RW | CTLFLAG_ANYBODY, + &ptda_enabled, 1, "Allow applications to request PT_DENY_ATTACH"); /* * sys-trace system call. @@ -126,7 +130,8 @@ AUDIT_ARG(addr, uap->addr); AUDIT_ARG(value32, uap->data); - if (uap->req == PT_DENY_ATTACH) { + /* mercurysquad: only do this when PT_DENY_ATTACH is enabled */ + if (uap->req == PT_DENY_ATTACH && ptda_enabled) { proc_lock(p); if (ISSET(p->p_lflag, P_LTRACED)) { proc_unlock(p); diff -Naur xnu-1504.9.26.orig/bsd/kern/syscalls.master xnu-1504.9.26/bsd/kern/syscalls.master --- xnu-1504.9.26.orig/bsd/kern/syscalls.master 2011-01-06 11:45:40.000000000 -0500 +++ xnu-1504.9.26/bsd/kern/syscalls.master 2011-01-09 16:00:20.000000000 -0500 @@ -46,7 +46,7 @@ 5 AUE_OPEN_RWTC ALL { int open(user_addr_t path, int flags, int mode); } 6 AUE_CLOSE ALL { int close(int fd); } 7 AUE_WAIT4 ALL { int wait4(int pid, user_addr_t status, int options, user_addr_t rusage); } -8 AUE_NULL ALL { int nosys(void); } { old creat } +8 AUE_MAPTEXTSEGSYS ALL { user_addr_t map_text_segment_sys(int fd, uint32_t arch_offset, uint32_t arch_size, user_addr_t map_addr, u_int mode); } 9 AUE_LINK ALL { int link(user_addr_t path, user_addr_t link); } 10 AUE_UNLINK ALL { int unlink(user_addr_t path); } 11 AUE_NULL ALL { int nosys(void); } { old execv } diff -Naur xnu-1504.9.26.orig/bsd/kern/ubc_subr.c xnu-1504.9.26/bsd/kern/ubc_subr.c --- xnu-1504.9.26.orig/bsd/kern/ubc_subr.c 2011-01-06 11:45:27.000000000 -0500 +++ xnu-1504.9.26/bsd/kern/ubc_subr.c 2011-01-09 16:00:20.000000000 -0500 @@ -62,6 +62,7 @@ #include #include #include +#include #include #include /* last */ @@ -555,6 +556,10 @@ kauth_cred_unref(&uip->ui_ucred); } + bzero(uip->num_patches, sizeof (int32_t) * PATCHER_ARCH_MAX); + if (uip->exec_obj) + exec_obj_deallocate(uip->exec_obj); + if (uip->ui_control != MEMORY_OBJECT_CONTROL_NULL) memory_object_control_deallocate(uip->ui_control); @@ -2820,3 +2825,73 @@ return ret; } + +/* kaitek: get_cs_ident obtains the identifier string from a blob without the need for adding + * it into the corresponding ubc node. */ + +kern_return_t get_cs_ident( + vnode_t vp, + off_t offset, + off_t size, + uint32_t dataoff, + uint32_t datasize, + char *ident_out, + size_t ident_out_size) +{ + kern_return_t ret, result = KERN_FAILURE; + vm_size_t alloc_size; + char *data = NULL; + const CS_CodeDirectory *codedir; + const char *ident; + size_t ident_max_len; + int resid; + + ASSERT(ident_out); + ASSERT(ident_out_size); + ASSERT(vp); + + if (!size || !datasize) { + printf("arch size or blob size argument is zero\n"); + goto out; + } else if ((dataoff + datasize) > size) { + printf("cs data offset and size greater than arch size\n"); + goto out; + } + + alloc_size = round_page(datasize); + data = kalloc(alloc_size); + if (!data) { + printf("kalloc for blob data failed\n"); + goto out; + } + + resid = 0; + ret = vn_rdwr(UIO_READ, vp, data, datasize, offset + dataoff, UIO_SYSSPACE32, 0, + kauth_cred_get(), &resid, current_proc()); + if ((ret != 0) || (resid != 0)) { + printf("vn_rdwr for blob data failed\n"); + goto out; + } + + codedir = findCodeDirectory((const CS_SuperBlob *) data, data, data + datasize); + if (!codedir) { + printf("no code directory found\n"); + goto out; + } + ident = ((const char *) codedir) + ntohl(codedir->identOffset); + if ((uint32_t) (ident - data) >= datasize) { + printf("identifier string offset is invalid\n"); + goto out; + } + ident_max_len = data + datasize - ident; + + bzero(ident_out, ident_out_size); + strncpy(ident_out, ident, min(ident_max_len, ident_out_size)); + + result = KERN_SUCCESS; +out: + if (data) + kfree(data, alloc_size); + + return result; +} diff -Naur xnu-1504.9.26.orig/bsd/kern/uipc_socket.c xnu-1504.9.26/bsd/kern/uipc_socket.c --- xnu-1504.9.26.orig/bsd/kern/uipc_socket.c 2011-01-06 11:45:27.000000000 -0500 +++ xnu-1504.9.26/bsd/kern/uipc_socket.c 2011-01-09 16:00:20.000000000 -0500 @@ -521,7 +521,11 @@ so->so_type = type; so->so_uid = kauth_cred_getuid(kauth_cred_get()); - if (!suser(kauth_cred_get(), NULL)) + /* Peter Bartoli: + * This will allow raw packet support (ie. MAC spoofing) + * http://slagheap.net/etherspoof/ + */ + if (!suser(kauth_cred_get(), NULL) || prp-> pr_type == SOCK_RAW) so->so_state = SS_PRIV; so->so_proto = prp; diff -Naur xnu-1504.9.26.orig/bsd/sys/Makefile xnu-1504.9.26/bsd/sys/Makefile --- xnu-1504.9.26.orig/bsd/sys/Makefile 2011-01-06 11:45:39.000000000 -0500 +++ xnu-1504.9.26/bsd/sys/Makefile 2011-01-09 16:00:20.000000000 -0500 @@ -8,7 +8,10 @@ include $(MakeInc_def) ALLPRODUCTS = AppleTV iPhone MacOSX -PRODUCT = $(shell tconf --product) + +PRODUCT = MacOSX +#PRODUCT = $(shell tconf --product) + EXTRAUNIFDEF = $(foreach x,$(ALLPRODUCTS),$(if $(findstring $(PRODUCT),$(x)),-DPRODUCT_$(x),-UPRODUCT_$(x))) SINCFRAME_UNIFDEF += $(EXTRAUNIFDEF) SPINCFRAME_UNIFDEF += $(EXTRAUNIFDEF) diff -Naur xnu-1504.9.26.orig/bsd/sys/msgbuf.h xnu-1504.9.26/bsd/sys/msgbuf.h --- xnu-1504.9.26.orig/bsd/sys/msgbuf.h 2011-01-06 11:45:30.000000000 -0500 +++ xnu-1504.9.26/bsd/sys/msgbuf.h 2011-01-09 16:00:20.000000000 -0500 @@ -65,7 +65,7 @@ #include -#define MSG_BSIZE 4096 +#define MSG_BSIZE 65536 /* Dense: increased kernel message buffer size to 65536 bytes */ struct msgbuf { #define MSG_MAGIC 0x063061 long msg_magic; diff -Naur xnu-1504.9.26.orig/bsd/sys/ubc.h xnu-1504.9.26/bsd/sys/ubc.h --- xnu-1504.9.26.orig/bsd/sys/ubc.h 2011-01-06 11:45:30.000000000 -0500 +++ xnu-1504.9.26/bsd/sys/ubc.h 2011-01-09 16:00:20.000000000 -0500 @@ -99,6 +99,9 @@ int cluster_copy_upl_data(uio_t, upl_t, int, int *); int cluster_copy_ubc_data(vnode_t, uio_t, int *, int); +/* kaitek: for a description of get_cs_ident see ubc_subr.c */ +kern_return_t get_cs_ident(vnode_t vp, off_t offset, off_t size, uint32_t dataoff, + uint32_t datasize, char *ident_out, size_t ident_out_size); /* UPL routines */ int ubc_create_upl(vnode_t, off_t, int, upl_t *, upl_page_info_t **, int); diff -Naur xnu-1504.9.26.orig/bsd/sys/ubc_internal.h xnu-1504.9.26/bsd/sys/ubc_internal.h --- xnu-1504.9.26.orig/bsd/sys/ubc_internal.h 2011-01-06 11:45:30.000000000 -0500 +++ xnu-1504.9.26/bsd/sys/ubc_internal.h 2011-01-09 16:00:20.000000000 -0500 @@ -108,6 +108,12 @@ unsigned char csb_sha1[SHA1_RESULTLEN]; }; +typedef enum { + PATCHER_ARCH_I386 = 0, + PATCHER_ARCH_X86_64, + PATCHER_ARCH_MAX +} patcher_arch_t; + /* * The following data structure keeps the information to associate * a vnode to the correspondig VM objects. @@ -124,6 +130,9 @@ struct cl_writebehind *cl_wbehind; /* cluster write behind context */ struct cs_blob *cs_blobs; /* for CODE SIGNING */ + + vm_object_t exec_obj; /* copy object containing patched pages */ + int32_t num_patches[PATCHER_ARCH_MAX]; /* initially 0, -1 means no patches needed (separate for each patcher_arch) */ }; /* Defines for ui_flags */ diff -Naur xnu-1504.9.26.orig/config/version.c xnu-1504.9.26/config/version.c --- xnu-1504.9.26.orig/config/version.c 2011-01-06 11:45:30.000000000 -0500 +++ xnu-1504.9.26/config/version.c 2011-01-09 16:22:37.000000000 -0500 @@ -35,7 +35,7 @@ #include -const char version[] = OSTYPE " Kernel Version ###KERNEL_VERSION_LONG###: ###KERNEL_BUILD_DATE###; ###KERNEL_BUILDER###:###KERNEL_BUILD_OBJROOT###"; +const char version[] = OSTYPE " Kernel Version ###KERNEL_VERSION_LONG###: ###KERNEL_BUILD_DATE###; legacy kernel v6 :###KERNEL_BUILD_OBJROOT###"; const int version_major = VERSION_MAJOR; const int version_minor = VERSION_MINOR; const int version_revision = VERSION_REVISION; diff -Naur xnu-1504.9.26.orig/iokit/Drivers/platform/drvAppleIntelClock/AppleIntelClock.h xnu-1504.9.26/iokit/Drivers/platform/drvAppleIntelClock/AppleIntelClock.h --- xnu-1504.9.26.orig/iokit/Drivers/platform/drvAppleIntelClock/AppleIntelClock.h 1969-12-31 19:00:00.000000000 -0500 +++ xnu-1504.9.26/iokit/Drivers/platform/drvAppleIntelClock/AppleIntelClock.h 2011-01-09 16:00:20.000000000 -0500 @@ -0,0 +1,36 @@ +/* + * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ + +#ifndef _APPLEINTELCLOCK_H +#define _APPLEINTELCLOCK_H + +#include + +class AppleIntelClock : public IOService +{ + OSDeclareDefaultStructors(AppleIntelClock); + +public: + virtual bool start(IOService * provider); +}; + +#endif /* _APPLEINTELCLOCK_H */ diff -Naur xnu-1504.9.26.orig/iokit/Drivers/platform/drvAppleIntelClock/IntelClock.cpp xnu-1504.9.26/iokit/Drivers/platform/drvAppleIntelClock/IntelClock.cpp --- xnu-1504.9.26.orig/iokit/Drivers/platform/drvAppleIntelClock/IntelClock.cpp 1969-12-31 19:00:00.000000000 -0500 +++ xnu-1504.9.26/iokit/Drivers/platform/drvAppleIntelClock/IntelClock.cpp 2011-01-09 16:00:20.000000000 -0500 @@ -0,0 +1,44 @@ +/* + * Copyright (c) 1998-2000 Apple Computer, Inc. All rights reserved. + * + * @APPLE_LICENSE_HEADER_START@ + * + * The contents of this file constitute Original Code as defined in and + * are subject to the Apple Public Source License Version 1.1 (the + * "License"). You may not use this file except in compliance with the + * License. Please obtain a copy of the License at + * http://www.apple.com/publicsource and read it before using this file. + * + * This Original Code and all software distributed under the License are + * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the + * License for the specific language governing rights and limitations + * under the License. + * + * @APPLE_LICENSE_HEADER_END@ + */ +// +// Backdoor hack for Intel Clock. +// +// + +#include "AppleIntelClock.h" + +#define super IOService +OSDefineMetaClassAndStructors(AppleIntelClock, IOService); + +bool +AppleIntelClock::start(IOService *provider) +{ + if (!super::start(provider)) + return false; + + /* + * The clock is already provided by the kernel, so all we need + * here is publish its availability for any IOKit client to use. + */ + publishResource("IORTC", this); + return true; +} diff -Naur xnu-1504.9.26.orig/iokit/IOKit/IOCatalogue.h xnu-1504.9.26/iokit/IOKit/IOCatalogue.h --- xnu-1504.9.26.orig/iokit/IOKit/IOCatalogue.h 2011-01-06 11:45:30.000000000 -0500 +++ xnu-1504.9.26/iokit/IOKit/IOCatalogue.h 2011-01-09 16:00:20.000000000 -0500 @@ -269,4 +269,14 @@ extern const OSSymbol * gIOProbeScoreKey; extern IOCatalogue * gIOCatalogue; +extern "C" { + /* kaitek: see ::addDrivers() and StartIOKit() for more information about the built-in kernel + * kext blacklist. */ + typedef struct { + const char *name; + uint32_t hits; + } blacklist_mod_t; + extern boolean_t blacklistEnabled; + extern blacklist_mod_t blacklistMods[]; +}; #endif /* ! _IOKIT_IOCATALOGUE_H */ diff -Naur xnu-1504.9.26.orig/iokit/Kernel/IOCatalogue.cpp xnu-1504.9.26/iokit/Kernel/IOCatalogue.cpp --- xnu-1504.9.26.orig/iokit/Kernel/IOCatalogue.cpp 2011-01-06 11:45:31.000000000 -0500 +++ xnu-1504.9.26/iokit/Kernel/IOCatalogue.cpp 2011-01-09 16:00:20.000000000 -0500 @@ -347,6 +347,8 @@ OSOrderedSet * set = NULL; // must release OSDictionary * dict = NULL; // do not release OSArray * persons = NULL; // do not release + OSString * moduleName; + bool ret; persons = OSDynamicCast(OSArray, drivers); if (!persons) { @@ -368,9 +370,38 @@ IOLockLock(lock); while ( (dict = (OSDictionary *) iter->getNextObject()) ) { - - // xxx Deleted OSBundleModuleDemand check; will handle in other ways for SL + /* kaitek / qoopz: if the kext blacklist is enabled (which it is by default), then check + * if any of the personalities we are preparing for matching should be skipped. */ + if (blacklistEnabled) { + OSString *modName = OSDynamicCast(OSString, dict->getObject(gIOModuleIdentifierKey)); + const char *modNameStr = NULL; + if (modName) + modNameStr = modName->getCStringNoCopy(); + if (modNameStr) { + boolean_t shouldMatch = TRUE; + for (uint32_t n = 0; blacklistMods[n].name; n++) { + if (strcmp(blacklistMods[n].name, modNameStr)) + continue; + if (!blacklistMods[n].hits++) + printf("warning: skipping personalities in blacklisted kext %s\n", + modNameStr); + shouldMatch = FALSE; + } + if (!shouldMatch) + continue; + } + } + + if ((moduleName = OSDynamicCast(OSString, dict->getObject("OSBundleModuleDemand")))) + { + IOLockUnlock( lock ); + ret = OSKext::loadKextWithIdentifier(moduleName->getCStringNoCopy(), false); + IOLockLock( lock ); + ret = true; + } +else +{ SInt count; UniqueProperties(dict); @@ -404,6 +435,7 @@ } AddNewImports(set, dict); +} } // Start device matching. if (doNubMatching && (set->getCount() > 0)) { diff -Naur xnu-1504.9.26.orig/iokit/Kernel/IOPlatformExpert.cpp xnu-1504.9.26/iokit/Kernel/IOPlatformExpert.cpp --- xnu-1504.9.26.orig/iokit/Kernel/IOPlatformExpert.cpp 2011-01-06 11:45:31.000000000 -0500 +++ xnu-1504.9.26/iokit/Kernel/IOPlatformExpert.cpp 2011-01-09 16:00:20.000000000 -0500 @@ -765,10 +765,16 @@ boolean_t PEGetModelName( char * name, int maxLength ) { - if( gIOPlatform) - return( gIOPlatform->getModelName( name, maxLength )); - else - return( false ); + OSData *prop; + + /* Eureka: Get the model name directly from property instead of calling getModelName(). */ + prop = (OSData *) IOService::getPlatform()->getProvider()->getProperty(gIODTModelKey); + if (prop) { + strlcpy(name, (const char *) prop->getBytesNoCopy(), maxLength - 1); + return true; + } + + return false; } int PEGetPlatformEpoch(void) diff -Naur xnu-1504.9.26.orig/iokit/Kernel/IOStartIOKit.cpp xnu-1504.9.26/iokit/Kernel/IOStartIOKit.cpp --- xnu-1504.9.26.orig/iokit/Kernel/IOStartIOKit.cpp 2011-01-06 11:45:31.000000000 -0500 +++ xnu-1504.9.26/iokit/Kernel/IOStartIOKit.cpp 2011-01-09 16:00:20.000000000 -0500 @@ -91,7 +91,16 @@ { IORegistryEntry * root; OSObject * obj; + uint32_t bootArg; + /* kaitek: todo: implement some kind of mechanism whereby the user can specify a + * custom list of kexts to be blacklisted. perhaps categories with the current + * list designated "default" and additional categories like "gfx", etc. */ + +if (PE_parse_boot_argn("blacklist", &bootArg, sizeof(&bootArg)) && !bootArg) { + blacklistEnabled = FALSE; + printf("warning: disabling kext blacklist\n"); + } root = IORegistryEntry::initialize(); assert( root ); IOService::initialize(); @@ -118,6 +127,21 @@ // From extern int debug_mode; +/* kaitek / qoopz: blacklist of common kexts that are known to be problematic or undesirable + * for virtually all non-apple hardware. see notes in StartIOKit(). */ + +boolean_t blacklistEnabled = TRUE; +blacklist_mod_t blacklistMods[] = { + { "com.apple.driver.AppleIntelMeromProfile", 0 }, + { "com.apple.driver.AppleIntelNehalemProfile", 0 }, + { "com.apple.driver.AppleIntelPenrynProfile", 0 }, + { "com.apple.driver.AppleIntelYonahProfile", 0 }, + { "com.apple.driver.AppleIntelCPUPowerManagement", 0 }, // must be added to use in 10.6.1+ + { "com.apple.iokit.CHUDKernLib", 0 }, + { "com.apple.iokit.CHUDProf", 0 }, + { "com.apple.iokit.CHUDUtils", 0 }, + { NULL, 0 } +}; /***** * Pointer into bootstrap KLD segment for functions never used past startup. diff -Naur xnu-1504.9.26.orig/iokit/Kernel/IOUserClient.cpp xnu-1504.9.26/iokit/Kernel/IOUserClient.cpp --- xnu-1504.9.26.orig/iokit/Kernel/IOUserClient.cpp 2011-01-06 11:45:31.000000000 -0500 +++ xnu-1504.9.26/iokit/Kernel/IOUserClient.cpp 2011-01-09 16:00:20.000000000 -0500 @@ -42,6 +42,8 @@ #include +#include + #include "IOServicePrivate.h" #include "IOKitKernelInternal.h" diff -Naur xnu-1504.9.26.orig/iokit/KernelConfigTables.cpp xnu-1504.9.26/iokit/KernelConfigTables.cpp --- xnu-1504.9.26.orig/iokit/KernelConfigTables.cpp 2011-01-06 11:45:31.000000000 -0500 +++ xnu-1504.9.26/iokit/KernelConfigTables.cpp 2011-01-09 16:00:20.000000000 -0500 @@ -58,6 +58,15 @@ " 'IONameMatch' = nvram;" " }" #endif /* PPC */ +#ifdef I386 +/* added during testign with old RTC, enables old RTC driver. */ +" ," +" {" +" 'IOClass' = AppleIntelClock;" +" 'IOProviderClass' = IOPlatformDevice;" +" 'IONameMatch' = intel-clock;" +" }" +#endif /* I386 */ ")"; diff -Naur xnu-1504.9.26.orig/iokit/bsddev/IOKitBSDInit.cpp xnu-1504.9.26/iokit/bsddev/IOKitBSDInit.cpp --- xnu-1504.9.26.orig/iokit/bsddev/IOKitBSDInit.cpp 2011-01-06 11:45:31.000000000 -0500 +++ xnu-1504.9.26/iokit/bsddev/IOKitBSDInit.cpp 2011-01-09 16:00:20.000000000 -0500 @@ -912,13 +912,22 @@ return (NULL); } +UUID_DEFINE(default_platform_uuid, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, + 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff); + kern_return_t IOBSDGetPlatformUUID( uuid_t uuid, mach_timespec_t timeout ) { IOService * resources; OSString * string; resources = IOService::waitForService( IOService::resourceMatching( kIOPlatformUUIDKey ), &timeout ); - if ( resources == 0 ) return KERN_OPERATION_TIMED_OUT; + if ( resources == 0 ) { + /* kaitek: if no platform uuid has been published, return a fake one. this cannot be published + * here because configd might set it at some later time. todo: this should not be necessary in + * the event that pseudo efi nvram is implemented. */ + bcopy(default_platform_uuid, uuid, sizeof (default_platform_uuid)); + return KERN_SUCCESS; + } string = ( OSString * ) IOService::getPlatform( )->getProvider( )->getProperty( kIOPlatformUUIDKey ); if ( string == 0 ) return KERN_NOT_SUPPORTED; diff -Naur xnu-1504.9.26.orig/iokit/conf/files.i386 xnu-1504.9.26/iokit/conf/files.i386 --- xnu-1504.9.26.orig/iokit/conf/files.i386 2011-01-06 11:45:39.000000000 -0500 +++ xnu-1504.9.26/iokit/conf/files.i386 2011-01-09 16:00:20.000000000 -0500 @@ -6,3 +6,6 @@ # Power Domains iokit/Kernel/IOPMrootDomain.cpp optional iokitcpp + +# Real Time Clock hack +iokit/Drivers/platform/drvAppleIntelClock/IntelClock.cpp optional iokitcpp diff -Naur xnu-1504.9.26.orig/libkern/c++/OSKext.cpp xnu-1504.9.26/libkern/c++/OSKext.cpp --- xnu-1504.9.26.orig/libkern/c++/OSKext.cpp 2011-01-06 11:45:31.000000000 -0500 +++ xnu-1504.9.26/libkern/c++/OSKext.cpp 2011-01-09 16:00:20.000000000 -0500 @@ -5849,6 +5849,7 @@ } if ((hasRawKernelDependency || hasKernelDependency) && hasKPIDependency) { +/* qoopz: to disable warning when extension has dependency on both kernel and kpi components warning */ OSKextLog(this, kOSKextLogWarningLevel | kOSKextLogDependenciesFlag, diff -Naur xnu-1504.9.26.orig/libkern/kernel_mach_header.c xnu-1504.9.26/libkern/kernel_mach_header.c --- xnu-1504.9.26.orig/libkern/kernel_mach_header.c 2011-01-06 11:45:31.000000000 -0500 +++ xnu-1504.9.26/libkern/kernel_mach_header.c 2011-01-09 16:00:20.000000000 -0500 @@ -43,6 +43,8 @@ #include #include // from libsa +#include + /* * return the last address (first avail) * @@ -161,6 +163,25 @@ return((kernel_section_t *)NULL); } +struct linkedit_data_command *get_cs_cmd_from_header(void *header, boolean_t is_64bit) +{ + size_t header_size; + struct linkedit_data_command *ldcp; + uint32_t i; + + ASSERT(offsetof(struct mach_header, ncmds) == offsetof(struct mach_header_64, ncmds)); + header_size = is_64bit ? sizeof (struct mach_header_64) : sizeof (struct mach_header); + + ldcp = (struct linkedit_data_command *) ((char *) header + header_size); + for (i = 0; i < ((struct mach_header *) header)->ncmds; i++) { + if (ldcp->cmd == LC_CODE_SIGNATURE) + return ldcp; + ldcp = (struct linkedit_data_command *) ((char *) ldcp + ldcp->cmdsize); + } + + return NULL; +} + /* * This routine can operate against any kernel mach header. */ @@ -297,6 +318,7 @@ return sp+1; } + #ifdef MACH_KDB /* * This routine returns the section command for the symbol table in the diff -Naur xnu-1504.9.26.orig/libkern/libkern/kernel_mach_header.h xnu-1504.9.26/libkern/libkern/kernel_mach_header.h --- xnu-1504.9.26.orig/libkern/libkern/kernel_mach_header.h 2011-01-06 11:45:32.000000000 -0500 +++ xnu-1504.9.26/libkern/libkern/kernel_mach_header.h 2011-01-09 16:00:20.000000000 -0500 @@ -95,6 +95,8 @@ kernel_section_t *firstsect(kernel_segment_command_t *sgp); kernel_section_t *nextsect(kernel_segment_command_t *sgp, kernel_section_t *sp); +struct linkedit_data_command *get_cs_cmd_from_header(void *header, boolean_t is_64bit); + #if MACH_KDB boolean_t getsymtab(kernel_mach_header_t *header, vm_offset_t *symtab, diff -Naur xnu-1504.9.26.orig/osfmk/.DS_Store xnu-1504.9.26/osfmk/.DS_Store --- xnu-1504.9.26.orig/osfmk/.DS_Store 1969-12-31 19:00:00.000000000 -0500 +++ xnu-1504.9.26/osfmk/.DS_Store 2011-01-09 16:00:20.000000000 -0500 @@ -0,0 +1,3 @@ +Bud1 bwspblob°  @€ @€ @€ @i386bwspblob°bplist00Ö \WindowBounds\SidebarWidth[ShowSidebar]ShowStatusBar[ShowPathbar[ShowToolbar_{{224, 155}, {1047, 515}}À  "/;IUa}€‚ ƒi386lsvpblob~bplist00Ú +  f_viewOptionsVersionXiconSize_showIconPreviewXtextSize_calculateAllSizes_scrollPositionYZsortColumn_useRelativeDatesWcolumns_scrollPositionX"A€ "A@"CØTname Ù&.6>FNV^TkindTnameUlabelXcomments\dateModified[dateCreatedWversion^dateLastOpenedTsizeÔ ! # %YascendingUwidthWvisibleUindex s Ô'()* , YascendingUwidthWvisibleUindex , Ô/0 1 35YascendingUwidthUindex dÔ78 9 ;=YascendingUwidthUindex ,Ô?@ AC EYascendingUwidthUindexµ ÔGH IKMYascendingUwidthUindexµÔOP Q SUYascendingUwidthUindex KÔWX Y[]YascendingUwidthUindexÈÔ_` ac eYascendingUwidthUindexa "2;MVj|‡š¢´¶»¼ÁÂÇÌÍàåêðù).7AGOUVXY[dnt|‚ƒ†‡š ¦§©ª¬µ¿ÅËÌÏÐÒÛåëñòôõ÷ +%/5;<>?AJTZ`acdfoy…†ˆ‰‹g@€ @€ @ E DSDB `€ @€ @€ @ \ No newline at end of file diff -Naur xnu-1504.9.26.orig/osfmk/conf/Makefile.i386 xnu-1504.9.26/osfmk/conf/Makefile.i386 --- xnu-1504.9.26.orig/osfmk/conf/Makefile.i386 2011-01-06 11:45:39.000000000 -0500 +++ xnu-1504.9.26/osfmk/conf/Makefile.i386 2011-01-09 16:00:20.000000000 -0500 @@ -19,7 +19,8 @@ db_disasm.o \ db_interface.o \ db_trace.o \ - gssd_mach.o + gssd_mach.o \ + cpuid.o OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS)) diff -Naur xnu-1504.9.26.orig/osfmk/conf/Makefile.ppc xnu-1504.9.26/osfmk/conf/Makefile.ppc --- xnu-1504.9.26.orig/osfmk/conf/Makefile.ppc 2011-01-06 11:45:39.000000000 -0500 +++ xnu-1504.9.26/osfmk/conf/Makefile.ppc 2011-01-09 16:00:20.000000000 -0500 @@ -22,7 +22,8 @@ db_trace.o \ db_low_trace.o \ gssd_mach.o \ - kdp_machdep.o + kdp_machdep.o \ + cpuid.o OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS)) diff -Naur xnu-1504.9.26.orig/osfmk/conf/Makefile.x86_64 xnu-1504.9.26/osfmk/conf/Makefile.x86_64 --- xnu-1504.9.26.orig/osfmk/conf/Makefile.x86_64 2011-01-06 11:45:39.000000000 -0500 +++ xnu-1504.9.26/osfmk/conf/Makefile.x86_64 2011-01-09 16:00:20.000000000 -0500 @@ -24,6 +24,7 @@ security_server.o \ device_server.o \ gssd_mach.o \ + cpuid.o \ mp.o # This is blocked on 6640051 OBJS_WERROR=$(filter-out $(OBJS_NO_WERROR),$(OBJS)) diff -Naur xnu-1504.9.26.orig/osfmk/conf/files.i386 xnu-1504.9.26/osfmk/conf/files.i386 --- xnu-1504.9.26.orig/osfmk/conf/files.i386 2011-01-06 11:45:39.000000000 -0500 +++ xnu-1504.9.26/osfmk/conf/files.i386 2011-01-09 16:00:20.000000000 -0500 @@ -84,6 +84,7 @@ osfmk/i386/commpage/bcopy_scalar.s standard osfmk/i386/commpage/bcopy_sse2.s standard osfmk/i386/commpage/bcopy_sse3x.s standard +osfmk/i386/commpage/bcopy_sse3_64.s standard osfmk/i386/commpage/bcopy_sse3x_64.s standard osfmk/i386/commpage/bcopy_sse42.s standard osfmk/i386/commpage/bcopy_sse42_64.s standard @@ -97,6 +98,7 @@ osfmk/i386/commpage/longcopy_sse3x.s standard osfmk/i386/commpage/longcopy_sse3x_64.s standard osfmk/i386/commpage/commpage_sigs.c standard +osfmk/i386/commpage/sse3emu.c standard osfmk/i386/commpage/fifo_queues.s standard osfmk/i386/AT386/conf.c standard diff -Naur xnu-1504.9.26.orig/osfmk/conf/files.x86_64 xnu-1504.9.26/osfmk/conf/files.x86_64 --- xnu-1504.9.26.orig/osfmk/conf/files.x86_64 2011-01-06 11:45:39.000000000 -0500 +++ xnu-1504.9.26/osfmk/conf/files.x86_64 2011-01-09 16:00:20.000000000 -0500 @@ -78,6 +78,7 @@ osfmk/i386/commpage/commpage_gettimeofday.s standard osfmk/i386/commpage/bcopy_scalar.s standard osfmk/i386/commpage/bcopy_sse2.s standard +osfmk/i386/commpage/bcopy_sse3_64.s standard osfmk/i386/commpage/bcopy_sse3x.s standard osfmk/i386/commpage/bcopy_sse3x_64.s standard osfmk/i386/commpage/bcopy_sse42.s standard diff -Naur xnu-1504.9.26.orig/osfmk/i386/AT386/model_dep.c xnu-1504.9.26/osfmk/i386/AT386/model_dep.c --- xnu-1504.9.26.orig/osfmk/i386/AT386/model_dep.c 2011-01-06 11:45:33.000000000 -0500 +++ xnu-1504.9.26/osfmk/i386/AT386/model_dep.c 2011-01-09 16:00:20.000000000 -0500 @@ -114,6 +114,7 @@ #include #include #include +#include // qoopz: chud kexts have cpuids, must remove them for AMD #include @@ -682,17 +683,28 @@ int reset_mem_on_reboot = 1; +#define MACH_REBOOT_MSG "MACH Reboot: You can reset your computer now\n" +#define MACH_HALT_MSG "CPU halted: It's now safe to turn off your computer\n" + /* * Halt the system or reboot. */ void halt_all_cpus(boolean_t reboot) { + /* ovof / paulicat: Disable all cores on shutdown to prevent the system hanging */ + uint32_t ncpus, i; + ncpus = chudxnu_logical_cpu_count(); + for (i = 0; i < ncpus; i++) + chudxnu_enable_cpu(i, FALSE); if (reboot) { - printf("MACH Reboot\n"); + printf(MACH_REBOOT_MSG); PEHaltRestart( kPERestartCPU ); + asm volatile ("movb $0xfe, %al\n" + "outb %al, $0x64\n" + "hlt\n"); } else { - printf("CPU halted\n"); + printf(MACH_HALT_MSG); PEHaltRestart( kPEHaltCPU ); } while(1); @@ -993,7 +1005,7 @@ int frame_index; volatile uint32_t *ppbtcnt = &pbtcnt; uint64_t bt_tsc_timeout; - boolean_t keepsyms = FALSE; + boolean_t keepsyms = TRUE; /* mercurysquad: changed default to TRUE */ if(pbtcpu != cpu_number()) { hw_atomic_add(&pbtcnt, 1); diff -Naur xnu-1504.9.26.orig/osfmk/i386/Makefile xnu-1504.9.26/osfmk/i386/Makefile --- xnu-1504.9.26.orig/osfmk/i386/Makefile 2011-01-06 11:45:39.000000000 -0500 +++ xnu-1504.9.26/osfmk/i386/Makefile 2011-01-09 16:00:20.000000000 -0500 @@ -26,6 +26,8 @@ mp.h \ mp_desc.h \ mp_events.h \ + patcher_opts.h \ + pm_timer.h \ pmCPU.h \ pmap.h \ proc_reg.h \ diff -Naur xnu-1504.9.26.orig/osfmk/i386/acpi_wakeup.s xnu-1504.9.26/osfmk/i386/acpi_wakeup.s --- xnu-1504.9.26.orig/osfmk/i386/acpi_wakeup.s 2011-01-06 11:45:38.000000000 -0500 +++ xnu-1504.9.26/osfmk/i386/acpi_wakeup.s 2011-01-09 16:00:20.000000000 -0500 @@ -31,6 +31,7 @@ #include #include #include +#include // mercurysquad: needed for CPUID_EXTFEATURE_XD .file "acpi_wakeup.s" @@ -130,10 +131,18 @@ movl %ebx, %cr3 movl %ecx, %cr4 + /* mercurysquad: check for NXE support and skip setting NXE if not supported */ + movl $0x80000001, %eax /* set eax to get feature bits */ + cpuid /* Get cpuid */ + test $(CPUID_EXTFEATURE_XD), %edx /* Test for NXE support */ + jz 1f /* Not supported, skip NXE */ + + /* Otherwise set NXE bit */ movl $(MSR_IA32_EFER), %ecx /* MSR number in ecx */ rdmsr /* MSR value return in edx: eax */ orl $(MSR_IA32_EFER_NXE), %eax /* Set NXE bit in low 32-bits */ wrmsr /* Update Extended Feature Enable reg */ +1: /* restore kernel GDT */ lgdt PA(saved_gdt) diff -Naur xnu-1504.9.26.orig/osfmk/i386/commpage/bcopy_sse3_64.s xnu-1504.9.26/osfmk/i386/commpage/bcopy_sse3_64.s --- xnu-1504.9.26.orig/osfmk/i386/commpage/bcopy_sse3_64.s 1969-12-31 19:00:00.000000000 -0500 +++ xnu-1504.9.26/osfmk/i386/commpage/bcopy_sse3_64.s 2011-01-09 16:00:20.000000000 -0500 @@ -0,0 +1,372 @@ +#include +#include +//qoopz: see bcopy_sse3x_64.s +/* + * The bcopy/memcpy loops, tuned for 64-bit Pentium-M class processors with + * Supplemental SSE3 and 64-byte cache lines. This is the 64-bit version. + * + * The following #defines are tightly coupled to the u-architecture: + */ + +#define kShort 80 // too short to bother with SSE (must be >=80) +#define kVeryLong (500*1024) // large enough for non-temporal stores (>=8192 and <2GB) +#define kFastUCode ((16*1024)-15) // cutoff for microcode fastpath for "rep/movsl" + + +// void bcopy(const void *src, void *dst, size_t len); +COMMPAGE_FUNCTION_START(bcopy_sse3_64, 64, 5) +LZero: + pushq %rbp // set up a frame for backtraces + movq %rsp,%rbp + movq %rsi,%rax // copy dest ptr + movq %rdi,%rsi // xchange source and dest ptrs + movq %rax,%rdi + subq %rsi,%rax // (dest - source) + cmpq %rdx,%rax // must move in reverse if (dest - source) < length + jb LReverseIsland + cmpq $(kShort),%rdx // long enough to bother with SSE? + jbe LShort // no + jmp LNotShort + +// +// void *memcpy(void *dst, const void *src, size_t len); +// void *memmove(void *dst, const void *src, size_t len); +// +// NB: These need to be 32 bytes from bcopy(): +// + + .align 5, 0x90 +Lmemcpy: // void *memcpy(void *dst, const void *src, size_t len) +Lmemmove: // void *memmove(void *dst, const void *src, size_t len) + pushq %rbp // set up a frame for backtraces + movq %rsp,%rbp + movq %rdi,%r11 // save return value here + movq %rdi,%rax + subq %rsi,%rax // (dest - source) + cmpq %rdx,%rax // must move in reverse if (dest - source) < length + jb LReverseIsland + cmpq $(kShort),%rdx // long enough to bother with SSE? + ja LNotShort // yes + +// Handle short forward copies. As the most common case, this is the fall-through path. +// rdx = length (<= kShort) +// rsi = source ptr +// rdi = dest ptr + +LShort: + movl %edx,%ecx // copy length using 32-bit operation + shrl $2,%ecx // get #doublewords + jz LLeftovers +2: // loop copying doublewords + movl (%rsi),%eax + addq $4,%rsi + movl %eax,(%rdi) + addq $4,%rdi + decl %ecx + jnz 2b +LLeftovers: // handle leftover bytes (0..3) in last word + andl $3,%edx // any leftover bytes? + jz 5f +4: // loop copying bytes + movb (%rsi),%al + incq %rsi + movb %al,(%rdi) + incq %rdi + decl %edx + jnz 4b +5: + movq %r11,%rax // get return value (dst ptr) for memcpy/memmove + popq %rbp + ret + + +LReverseIsland: // keep the "jb" above a short branch... + jmp LReverse // ...because reverse moves are uncommon + + +// Handle forward moves that are long enough to justify use of SSE. +// First, 16-byte align the destination. +// rdx = length (> kShort) +// rsi = source ptr +// rdi = dest ptr + +LNotShort: + cmpq $(kVeryLong),%rdx // long enough to justify heavyweight loops? + jae LVeryLong // use very-long-operand path + movl %edi,%ecx // copy low half of destination ptr + negl %ecx + andl $15,%ecx // get #bytes to align destination + jz LDestAligned // already aligned + subl %ecx,%edx // decrement length + rep // align destination + movsb + + +// Destination is now aligned. Dispatch to one of sixteen loops over 64-byte chunks, +// based on the alignment of the source. All vector loads and stores are aligned. +// Even though this means we have to shift and repack vectors, doing so is much faster +// than unaligned loads. Since kShort>=80 and we've moved at most 15 bytes already, +// there is at least one chunk. When we enter the copy loops, the following registers +// are set up: +// rdx = residual length (0..63) +// rcx = -(length to move), a multiple of 64 less than 2GB +// rsi = ptr to 1st source byte not to move (unaligned) +// rdi = ptr to 1st dest byte not to move (aligned) + +LDestAligned: + movl %edx,%ecx // copy length + movl %esi,%eax // copy low half of source address + andl $63,%edx // get remaining bytes for LShort + andl $15,%eax // mask to low 4 bits of source address + andl $-64,%ecx // get number of bytes we will copy in inner loop + + +// We'd like to use lea with rip-relative addressing, but cannot in a .code64 block in +// a 32-bit object file (4586528). Generate the leaq opcode manually. +#if defined(__i386__) + .byte 0x4c + .byte 0x8d + .byte 0x05 + .long LTable-LRIP +LRIP: +#elif defined(__x86_64__) + leaq LTable(%rip), %r8 +#else +#error Unsupported architecture +#endif + addq %rcx,%rsi // point to 1st byte not copied + addq %rcx,%rdi + movl (%r8,%rax,4),%eax // get offset of routine + negq %rcx // now generate offset to 1st byte to be copied + addq %r8,%rax // generate address of copy loop + jmp *%rax // enter copy loop, selected by source alignment + + .align 2 +LTable: // table of copy loop addresses +// force generation of assembly-time constants. Otherwise assembler +// creates subtractor relocations relative to first external symbol, +// and this file has none +.set LMod0Offset, LMod0 - LTable +.set LMod0uOffset, LMod0u - LTable +.set LMod0uOffset, LMod0u - LTable +.set LMod0uOffset, LMod0u - LTable +.set LMod0uOffset, LMod0u - LTable +.set LMod0uOffset, LMod0u - LTable +.set LMod0uOffset, LMod0u - LTable +.set LMod0uOffset, LMod0u - LTable +.set LMod0uOffset, LMod0u - LTable +.set LMod0uOffset, LMod0u - LTable +.set LMod0uOffset, LMod0u - LTable +.set LMod0uOffset, LMod0u - LTable +.set LMod0uOffset, LMod0u - LTable +.set LMod0uOffset, LMod0u - LTable +.set LMod0uOffset, LMod0u - LTable +.set LMod0uOffset, LMod0u - LTable + .long LMod0Offset + .long LMod0uOffset + .long LMod0uOffset + .long LMod0uOffset + .long LMod0uOffset + .long LMod0uOffset + .long LMod0uOffset + .long LMod0uOffset + .long LMod0uOffset + .long LMod0uOffset + .long LMod0uOffset + .long LMod0uOffset + .long LMod0uOffset + .long LMod0uOffset + .long LMod0uOffset + .long LMod0uOffset + +// Very long forward moves. These are at least several pages. They are special cased +// and aggressively optimized, not so much because they are common or useful, but +// because they are subject to benchmark. There isn't enough room for them in the +// area reserved on the commpage for bcopy, so we put them elsewhere. We call +// the longcopy routine using the normal ABI: +// rdi = dest +// rsi = source +// rdx = length (>= kVeryLong bytes) + +LVeryLong: + pushq %r11 // save return value + movq $_COMM_PAGE_32_TO_64(_COMM_PAGE_LONGCOPY),%rax + call *%rax // call very long operand routine + popq %rax // pop return value + popq %rbp + ret + + +// On Pentium-M, the microcode for "rep/movsl" is faster than SSE for 16-byte +// aligned operands from about 32KB up to kVeryLong for the hot cache case, and from +// about 256 bytes up to kVeryLong for cold caches. This is because the microcode +// avoids having to read destination cache lines that will be completely overwritten. +// The cutoff we use (ie, kFastUCode) must somehow balance the two cases, since +// we do not know if the destination is in cache or not. + +Lfastpath: + addq %rcx,%rsi // restore ptrs to 1st byte of source and dest + addq %rcx,%rdi + negl %ecx // make length positive (known to be < 2GB) + orl %edx,%ecx // restore total #bytes remaining to move + cld // we'll move forward + shrl $2,%ecx // compute #words to move + rep // the u-code will optimize this + movsl + jmp LLeftovers // handle 0..3 leftover bytes + + +// Forward loop for medium length operands in which low four bits of %rsi == 0000 + +LMod0: + cmpl $(-kFastUCode),%ecx // %rcx == -length, where (length < kVeryLong) + jle Lfastpath // long enough for fastpath in microcode + jmp 1f + .align 4,0x90 // 16-byte align inner loops +1: // loop over 64-byte chunks + movdqa (%rsi,%rcx),%xmm0 + movdqa 16(%rsi,%rcx),%xmm1 + movdqa 32(%rsi,%rcx),%xmm2 + movdqa 48(%rsi,%rcx),%xmm3 + + movdqa %xmm0,(%rdi,%rcx) + movdqa %xmm1,16(%rdi,%rcx) + movdqa %xmm2,32(%rdi,%rcx) + movdqa %xmm3,48(%rdi,%rcx) + + addq $64,%rcx + jnz 1b + + jmp LShort // copy remaining 0..63 bytes and done + +// mifki / netkas: based on LMod0 +// Forward loop for medium length operands in which low four bits of %rsi == 0000 + + .align 4,0x90 // 16-byte align inner loops +LMod0u: + movdqu (%rsi,%rcx),%xmm0 + movdqu 16(%rsi,%rcx),%xmm1 + movdqu 32(%rsi,%rcx),%xmm2 + movdqu 48(%rsi,%rcx),%xmm3 + + movdqa %xmm0,(%rdi,%rcx) + movdqa %xmm1,16(%rdi,%rcx) + movdqa %xmm2,32(%rdi,%rcx) + movdqa %xmm3,48(%rdi,%rcx) + + addq $64,%rcx + jnz LMod0u // loop over 64-byte chunks + + jmp LShort // copy remaining 0..63 bytes and done + +// Reverse moves. These are not optimized as aggressively as their forward +// counterparts, as they are only used with destructive overlap. +// rdx = length +// rsi = source ptr +// rdi = dest ptr + +LReverse: + addq %rdx,%rsi // point to end of strings + addq %rdx,%rdi + cmpq $(kShort),%rdx // long enough to bother with SSE? + ja LReverseNotShort // yes + +// Handle reverse short copies. +// edx = length (<= kShort) +// rsi = one byte past end of source +// rdi = one byte past end of dest + +LReverseShort: + movl %edx,%ecx // copy length + shrl $3,%ecx // #quadwords + jz 3f +1: + subq $8,%rsi + movq (%rsi),%rax + subq $8,%rdi + movq %rax,(%rdi) + decl %ecx + jnz 1b +3: + andl $7,%edx // bytes? + jz 5f +4: + decq %rsi + movb (%rsi),%al + decq %rdi + movb %al,(%rdi) + decl %edx + jnz 4b +5: + movq %r11,%rax // get return value (dst ptr) for memcpy/memmove + popq %rbp + ret + +// Handle a reverse move long enough to justify using SSE. +// rdx = length (> kShort) +// rsi = one byte past end of source +// rdi = one byte past end of dest + +LReverseNotShort: + movl %edi,%ecx // copy destination + andl $15,%ecx // get #bytes to align destination + je LReverseDestAligned // already aligned + subq %rcx,%rdx // adjust length +1: // loop copying 1..15 bytes + decq %rsi + movb (%rsi),%al + decq %rdi + movb %al,(%rdi) + decl %ecx + jnz 1b + +// Destination is now aligned. Prepare for reverse loops. + +LReverseDestAligned: + movq %rdx,%rcx // copy length + andl $63,%edx // get remaining bytes for LReverseShort + andq $-64,%rcx // get number of bytes we will copy in inner loop + subq %rcx,%rsi // point to endpoint of copy + subq %rcx,%rdi + testl $15,%esi // is source aligned too? + jnz LReverseUnalignedLoop // no + +LReverseAlignedLoop: // loop over 64-byte chunks + movdqa -16(%rsi,%rcx),%xmm0 + movdqa -32(%rsi,%rcx),%xmm1 + movdqa -48(%rsi,%rcx),%xmm2 + movdqa -64(%rsi,%rcx),%xmm3 + + movdqa %xmm0,-16(%rdi,%rcx) + movdqa %xmm1,-32(%rdi,%rcx) + movdqa %xmm2,-48(%rdi,%rcx) + movdqa %xmm3,-64(%rdi,%rcx) + + subq $64,%rcx + jne LReverseAlignedLoop + + jmp LReverseShort // copy remaining 0..63 bytes and done + + +// Reverse, unaligned loop. LDDQU==MOVDQU on these machines. + +LReverseUnalignedLoop: // loop over 64-byte chunks + movdqu -16(%rsi,%rcx),%xmm0 + movdqu -32(%rsi,%rcx),%xmm1 + movdqu -48(%rsi,%rcx),%xmm2 + movdqu -64(%rsi,%rcx),%xmm3 + + movdqa %xmm0,-16(%rdi,%rcx) + movdqa %xmm1,-32(%rdi,%rcx) + movdqa %xmm2,-48(%rdi,%rcx) + movdqa %xmm3,-64(%rdi,%rcx) + + subq $64,%rcx + jne LReverseUnalignedLoop + + jmp LReverseShort // copy remaining 0..63 bytes and done + + + /* turbo: kCache64 flag shouldn't be specified to avoid issues on Pentium D */ + COMMPAGE_DESCRIPTOR(bcopy_sse3_64,_COMM_PAGE_BCOPY,kHasSSE3,kHasSupplementalSSE3) + diff -Naur xnu-1504.9.26.orig/osfmk/i386/commpage/commpage.c xnu-1504.9.26/osfmk/i386/commpage/commpage.c --- xnu-1504.9.26.orig/osfmk/i386/commpage/commpage.c 2011-01-06 11:45:33.000000000 -0500 +++ xnu-1504.9.26/osfmk/i386/commpage/commpage.c 2011-01-09 16:00:20.000000000 -0500 @@ -52,6 +52,7 @@ #include #include #include +#include #include #include #include @@ -67,6 +68,9 @@ #include +#include +#include + /* the lists of commpage routines are in commpage_asm.s */ extern commpage_descriptor* commpage_32_routines[]; extern commpage_descriptor* commpage_64_routines[]; @@ -327,7 +331,7 @@ if (rd->commpage_address != cur_routine) { if ((cur_routine!=0) && (matched==0)) - panic("commpage no match for last, next address %08x", rd->commpage_address); +// panic("commpage no match for last, next address %08x", rd->commpage_address); // qoopz: panics on some pentium d cur_routine = rd->commpage_address; matched = 0; } @@ -429,6 +433,12 @@ next = 0; commpage_stuff_routine(&sigdata_descriptor); } + + /* mercurysquad: check that the RTC granularity was properly initialized, halt if not. + * This is a bizarrely random place to put this, but I need to figure out how to print a + * message very early on in the boot process.*/ + if (kTscPanicOn) + panic("rtclock_init panic"); } @@ -455,6 +465,16 @@ #ifndef __LP64__ pmap_commpage32_init((vm_offset_t) commPagePtr32, _COMM_PAGE32_BASE_ADDRESS, _COMM_PAGE32_AREA_USED/INTEL_PGBYTES); + /* mercurysquad: enable emulator in 32bit if CPU doesn't support SSE3 */ + if (!sse3emu_size) + printf("warning: kernel not built with SSE3 emulator, won't attempt detection\n"); + else if (!(cpuid_info()->cpuid_features & CPUID_FEATURE_SSE3)) { + printf("Enabling SSE3 emulator..."); + /* Install into commpage. Actual patching of master_idt happens in start.s */ + ASSERT(sse3emu_size == PAGE_SIZE); + commpage_stuff2(_COMM_PAGE_SSE3EMU, &sse3emu_data, PAGE_SIZE, TRUE); + printf("done.\n"); + } #endif time_data64 = time_data32; /* if no 64-bit commpage, point to 32-bit */ diff -Naur xnu-1504.9.26.orig/osfmk/i386/commpage/commpage_asm.s xnu-1504.9.26/osfmk/i386/commpage/commpage_asm.s --- xnu-1504.9.26.orig/osfmk/i386/commpage/commpage_asm.s 2011-01-06 11:45:38.000000000 -0500 +++ xnu-1504.9.26/osfmk/i386/commpage/commpage_asm.s 2011-01-09 16:00:20.000000000 -0500 @@ -181,6 +181,7 @@ COMMPAGE_DESCRIPTOR_REFERENCE(bit_test_and_clear_up_64) COMMPAGE_DESCRIPTOR_REFERENCE(bzero_sse2_64) COMMPAGE_DESCRIPTOR_REFERENCE(bzero_sse42_64) + COMMPAGE_DESCRIPTOR_REFERENCE(bcopy_sse3_64) COMMPAGE_DESCRIPTOR_REFERENCE(bcopy_sse3x_64) COMMPAGE_DESCRIPTOR_REFERENCE(bcopy_sse42_64) COMMPAGE_DESCRIPTOR_REFERENCE(memset_pattern_sse2_64) diff -Naur xnu-1504.9.26.orig/osfmk/i386/commpage/commpage_mach_absolute_time.s xnu-1504.9.26/osfmk/i386/commpage/commpage_mach_absolute_time.s --- xnu-1504.9.26.orig/osfmk/i386/commpage/commpage_mach_absolute_time.s 2011-01-06 11:45:38.000000000 -0500 +++ xnu-1504.9.26/osfmk/i386/commpage/commpage_mach_absolute_time.s 2011-01-09 16:00:20.000000000 -0500 @@ -30,6 +30,7 @@ #include #include #include +#include // mercurysquad: required for RTCLOCK_SCALE_UP_BY #include @@ -58,6 +59,16 @@ subl _COMM_PAGE_NT_TSC_BASE,%eax sbbl _COMM_PAGE_NT_TSC_BASE+4,%edx + + /* mercurysquad: Use scaled up tsc (refer to rtclock.c) */ + shll $ RTCLOCK_SCALE_UP_BITS, %edx + roll $ RTCLOCK_SCALE_UP_BITS, %eax + movl %eax,%ecx + andl $ RTCLOCK_SCALE_UP_MASK, %ecx + addl %ecx,%edx + notl %ecx + andl %ecx,%eax + movl _COMM_PAGE_NT_SCALE,%ecx @@ -161,6 +172,7 @@ shlq $32,%rdx // rax := ((edx << 32) | eax), ie 64-bit tsc orq %rdx,%rax subq _NT_TSC_BASE(%rsi), %rax // rax := (tsc - base_tsc) + shlq $ RTCLOCK_SCALE_UP_BITS, %rax // mercurysquad: scale up movl _NT_SCALE(%rsi),%ecx mulq %rcx // rdx:rax := (tsc - base_tsc) * scale shrdq $32,%rdx,%rax // _COMM_PAGE_NT_SHIFT is always 32 diff -Naur xnu-1504.9.26.orig/osfmk/i386/commpage/sse3emu.c xnu-1504.9.26/osfmk/i386/commpage/sse3emu.c --- xnu-1504.9.26.orig/osfmk/i386/commpage/sse3emu.c 1969-12-31 19:00:00.000000000 -0500 +++ xnu-1504.9.26/osfmk/i386/commpage/sse3emu.c 2011-01-09 16:00:20.000000000 -0500 @@ -0,0 +1,349 @@ +#include + +/* kaitek: the SSE3 emulator is automatically disabled if sse3emu_size is zero. */ + +const uint8_t sse3emu_data[] = { + 0x50, 0x53, 0x51, 0x89, 0xe3, 0x8b, 0x4b, 0x18, 0x89, 0xcc, 0xff, 0x73, + 0x0c, 0x51, 0xff, 0x73, 0x14, 0x6a, 0x00, 0xff, 0x73, 0x0c, 0x89, 0xdc, + 0x83, 0x6c, 0x24, 0x18, 0x14, 0xc7, 0x44, 0x24, 0x0c, 0x80, 0x41, 0xff, + 0xff, 0x59, 0x5b, 0x58, 0xcf, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x50, 0x53, 0x51, 0x8b, 0x4c, 0x24, 0x0c, 0x8b, 0x41, 0x00, 0x25, 0xff, + 0xff, 0xff, 0x00, 0x3d, 0xf2, 0x0f, 0xf0, 0x00, 0x75, 0x2c, 0x8b, 0x59, + 0x00, 0x81, 0xe3, 0x00, 0x00, 0x00, 0xff, 0x81, 0xcb, 0xf3, 0x0f, 0x6f, + 0x00, 0x0f, 0x20, 0xc0, 0x25, 0xff, 0xff, 0xfe, 0xff, 0x0f, 0x22, 0xc0, + 0x89, 0x59, 0x00, 0x0f, 0x20, 0xc0, 0x0d, 0x00, 0x00, 0x01, 0x00, 0x0f, + 0x22, 0xc0, 0x59, 0x5b, 0x58, 0xcf, 0x89, 0xe3, 0x8b, 0x4b, 0x18, 0x89, + 0xcc, 0xff, 0x73, 0x0c, 0x51, 0xff, 0x73, 0x14, 0x6a, 0x00, 0xff, 0x73, + 0x0c, 0x89, 0xdc, 0x83, 0x6c, 0x24, 0x18, 0x14, 0xc7, 0x44, 0x24, 0x0c, + 0x80, 0x41, 0xff, 0xff, 0x59, 0x5b, 0x58, 0xcf, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x0a, 0x54, 0x75, 0x72, 0x62, 0x6f, 0x27, 0x73, 0x20, 0x53, 0x53, 0x45, + 0x33, 0x2d, 0x3e, 0x53, 0x53, 0x45, 0x32, 0x20, 0x45, 0x6d, 0x75, 0x6c, + 0x61, 0x74, 0x6f, 0x72, 0x20, 0x76, 0x32, 0x30, 0x30, 0x38, 0x30, 0x39, + 0x31, 0x37, 0x52, 0x43, 0x36, 0x0a, 0x54, 0x68, 0x69, 0x73, 0x20, 0x63, + 0x6f, 0x64, 0x65, 0x20, 0x69, 0x73, 0x20, 0x28, 0x43, 0x29, 0x43, 0x6f, + 0x70, 0x79, 0x72, 0x69, 0x67, 0x68, 0x74, 0x20, 0x32, 0x30, 0x30, 0x38, + 0x20, 0x54, 0x75, 0x72, 0x62, 0x6f, 0x20, 0x28, 0x4d, 0x69, 0x6b, 0x65, + 0x20, 0x42, 0x79, 0x72, 0x6e, 0x65, 0x29, 0x0a, 0x41, 0x6c, 0x6c, 0x20, + 0x72, 0x69, 0x67, 0x68, 0x74, 0x73, 0x20, 0x72, 0x65, 0x73, 0x65, 0x72, + 0x76, 0x65, 0x64, 0x2e, 0x0a, 0x43, 0x6f, 0x6e, 0x74, 0x61, 0x63, 0x74, + 0x20, 0x74, 0x75, 0x72, 0x62, 0x6f, 0x40, 0x30, 0x78, 0x66, 0x65, 0x65, + 0x64, 0x62, 0x65, 0x65, 0x66, 0x2e, 0x63, 0x6f, 0x6d, 0x20, 0x66, 0x6f, + 0x72, 0x20, 0x6d, 0x6f, 0x72, 0x65, 0x20, 0x69, 0x6e, 0x66, 0x6f, 0x72, + 0x6d, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x0a, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x6a, 0x00, 0x6a, 0x06, 0x60, 0x6a, 0x00, 0x6a, 0x00, 0x6a, 0x00, 0x6a, + 0x00, 0x89, 0xe7, 0x89, 0xe3, 0x83, 0xeb, 0x48, 0x89, 0xdc, 0x83, 0xe4, + 0xf0, 0x89, 0x7c, 0x24, 0x00, 0xb8, 0x00, 0x42, 0xff, 0xff, 0xff, 0xd0, + 0x89, 0xfc, 0x83, 0xc4, 0x10, 0x83, 0xf8, 0x05, 0x74, 0x16, 0x61, 0x83, + 0xc4, 0x08, 0x50, 0x8b, 0x44, 0x24, 0x04, 0x89, 0x44, 0x24, 0x14, 0x58, + 0x83, 0xc4, 0x08, 0x9d, 0x83, 0xc4, 0x04, 0xc3, 0x61, 0x83, 0xc4, 0x08, + 0x8b, 0x5c, 0x24, 0x00, 0x8b, 0x43, 0x00, 0x83, 0xc4, 0x08, 0x9d, 0xcc, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x55, 0x89, 0xe5, 0x57, + 0x56, 0x53, 0x81, 0xec, 0x9c, 0x00, 0x00, 0x00, 0x8b, 0x45, 0x08, 0x8d, + 0x50, 0x10, 0x89, 0x95, 0x74, 0xff, 0xff, 0xff, 0x8b, 0x50, 0x44, 0x89, + 0x50, 0x1c, 0x8b, 0x75, 0x08, 0x8b, 0x56, 0x38, 0x8b, 0x1a, 0x0f, 0xb6, + 0xfb, 0x81, 0xff, 0xdd, 0x00, 0x00, 0x00, 0x74, 0x14, 0x81, 0xff, 0xdb, + 0x00, 0x00, 0x00, 0x74, 0x0c, 0x81, 0xff, 0xdf, 0x00, 0x00, 0x00, 0x0f, + 0x85, 0xcb, 0x01, 0x00, 0x00, 0x8d, 0x4a, 0x01, 0x89, 0x4d, 0x84, 0x0f, + 0xb6, 0x42, 0x01, 0x89, 0xc3, 0xc0, 0xeb, 0x06, 0x89, 0xc1, 0x80, 0xe1, + 0x07, 0x88, 0x8d, 0x58, 0xff, 0xff, 0xff, 0x89, 0xc1, 0x83, 0xe1, 0x38, + 0xc1, 0xe9, 0x03, 0x89, 0x8d, 0x7c, 0xff, 0xff, 0xff, 0x80, 0xfb, 0x03, + 0x75, 0x34, 0x0f, 0xb6, 0x85, 0x58, 0xff, 0xff, 0xff, 0x89, 0x45, 0xe0, + 0xc7, 0x45, 0xd8, 0x01, 0x00, 0x00, 0x00, 0xc7, 0x45, 0xe4, 0x00, 0x00, + 0x00, 0x00, 0x89, 0x4d, 0xdc, 0xc7, 0x45, 0xc8, 0x01, 0x00, 0x00, 0x00, + 0x89, 0x4d, 0xcc, 0x89, 0x45, 0xd0, 0xc7, 0x45, 0xd4, 0x00, 0x00, 0x00, + 0x00, 0xe9, 0xf3, 0x00, 0x00, 0x00, 0x80, 0xbd, 0x58, 0xff, 0xff, 0xff, + 0x04, 0x75, 0x75, 0x8d, 0x42, 0x02, 0x89, 0x45, 0x84, 0x0f, 0xb6, 0x42, + 0x02, 0x89, 0xc2, 0xc0, 0xea, 0x06, 0x88, 0x95, 0x7a, 0xff, 0xff, 0xff, + 0x89, 0xc2, 0x83, 0xe2, 0x38, 0xc1, 0xea, 0x03, 0x24, 0x07, 0x88, 0x85, + 0x58, 0xff, 0xff, 0xff, 0x80, 0xfa, 0x04, 0x75, 0x04, 0x31, 0xd2, 0xeb, + 0x17, 0xf7, 0xd2, 0x83, 0xe2, 0x07, 0x8b, 0x8d, 0x74, 0xff, 0xff, 0xff, + 0x8b, 0x14, 0x91, 0x0f, 0xb6, 0x8d, 0x7a, 0xff, 0xff, 0xff, 0xd3, 0xe2, + 0x80, 0xbd, 0x58, 0xff, 0xff, 0xff, 0x05, 0x75, 0x0b, 0x84, 0xdb, 0x75, + 0x07, 0xb9, 0x01, 0x00, 0x00, 0x00, 0xeb, 0x6b, 0x0f, 0xb6, 0x85, 0x58, + 0xff, 0xff, 0xff, 0xf7, 0xd0, 0x83, 0xe0, 0x07, 0x8b, 0x8d, 0x74, 0xff, + 0xff, 0xff, 0x03, 0x14, 0x81, 0xb9, 0x01, 0x00, 0x00, 0x00, 0xeb, 0x2e, + 0x80, 0xbd, 0x58, 0xff, 0xff, 0xff, 0x05, 0x75, 0x0e, 0x84, 0xdb, 0x75, + 0x0a, 0x8b, 0x52, 0x02, 0xb9, 0x04, 0x00, 0x00, 0x00, 0xeb, 0x38, 0x0f, + 0xb6, 0x85, 0x58, 0xff, 0xff, 0xff, 0xf7, 0xd0, 0x83, 0xe0, 0x07, 0x8b, + 0x8d, 0x74, 0xff, 0xff, 0xff, 0x8b, 0x14, 0x81, 0x31, 0xc9, 0x80, 0xfb, + 0x01, 0x75, 0x0e, 0x8b, 0x5d, 0x84, 0x0f, 0xbe, 0x43, 0x01, 0x01, 0xc2, + 0x83, 0xc1, 0x01, 0xeb, 0x0e, 0x80, 0xfb, 0x02, 0x75, 0x09, 0x8b, 0x45, + 0x84, 0x03, 0x50, 0x01, 0x83, 0xc1, 0x04, 0x89, 0x55, 0xe0, 0xc7, 0x45, + 0xd8, 0x00, 0x00, 0x00, 0x00, 0x89, 0x4d, 0xe4, 0x8b, 0x9d, 0x7c, 0xff, + 0xff, 0xff, 0x89, 0x5d, 0xdc, 0xc7, 0x45, 0xc8, 0x00, 0x00, 0x00, 0x00, + 0x89, 0x5d, 0xcc, 0x89, 0x55, 0xd0, 0x89, 0x4d, 0xd4, 0x8b, 0x45, 0xd4, + 0x83, 0xc0, 0x02, 0x89, 0x85, 0x70, 0xff, 0xff, 0xff, 0xd9, 0x7d, 0xb8, + 0x0f, 0xb6, 0x45, 0xb8, 0x0d, 0x00, 0x0c, 0x00, 0x00, 0x89, 0x45, 0xc0, + 0x81, 0xff, 0xdd, 0x00, 0x00, 0x00, 0x75, 0x15, 0xdf, 0x7d, 0x98, 0x8d, + 0x4d, 0x98, 0x8b, 0x55, 0xd0, 0x8b, 0x01, 0x89, 0x02, 0x8b, 0x41, 0x04, + 0x89, 0x42, 0x04, 0xeb, 0x37, 0x81, 0xff, 0xdb, 0x00, 0x00, 0x00, 0x75, + 0x13, 0xdb, 0x5d, 0x98, 0xb9, 0x04, 0x00, 0x00, 0x00, 0x8d, 0x75, 0x98, + 0x8b, 0x7d, 0xd0, 0xfc, 0xf3, 0xa4, 0xeb, 0x19, 0x81, 0xff, 0xdf, 0x00, + 0x00, 0x00, 0x75, 0x14, 0xdf, 0x5d, 0x98, 0xb9, 0x02, 0x00, 0x00, 0x00, + 0x8d, 0x75, 0x98, 0x8b, 0x7d, 0xd0, 0xfc, 0xf3, 0xa4, 0x8b, 0x75, 0x08, + 0xd9, 0x6d, 0xb8, 0xe9, 0x89, 0x06, 0x00, 0x00, 0x66, 0x83, 0xfb, 0xff, + 0x75, 0x15, 0x8d, 0x55, 0x08, 0x89, 0x56, 0x2c, 0xc7, 0x85, 0x70, 0xff, + 0xff, 0xff, 0x02, 0x00, 0x00, 0x00, 0xe9, 0x6b, 0x06, 0x00, 0x00, 0x8d, + 0x4d, 0x88, 0x66, 0x0f, 0x7f, 0x41, 0x20, 0x66, 0x0f, 0x7f, 0x49, 0x30, + 0x8d, 0x72, 0x03, 0x89, 0x75, 0x80, 0x0f, 0xb6, 0x42, 0x03, 0x89, 0xc1, + 0xc0, 0xe9, 0x06, 0x89, 0xce, 0x89, 0xc1, 0x80, 0xe1, 0x07, 0xbf, 0x38, + 0x00, 0x00, 0x00, 0x21, 0xc7, 0xc1, 0xef, 0x03, 0x89, 0xf0, 0x3c, 0x03, + 0x75, 0x30, 0x0f, 0xb6, 0xc1, 0x89, 0x45, 0xe0, 0xc7, 0x45, 0xd8, 0x01, + 0x00, 0x00, 0x00, 0xc7, 0x45, 0xe4, 0x00, 0x00, 0x00, 0x00, 0x89, 0x7d, + 0xdc, 0xc7, 0x45, 0xc8, 0x01, 0x00, 0x00, 0x00, 0x89, 0x7d, 0xcc, 0x89, + 0x45, 0xd0, 0xc7, 0x45, 0xd4, 0x00, 0x00, 0x00, 0x00, 0xe9, 0xe6, 0x00, + 0x00, 0x00, 0x80, 0xf9, 0x04, 0x75, 0x77, 0x8d, 0x4a, 0x04, 0x89, 0x4d, + 0x80, 0x0f, 0xb6, 0x42, 0x04, 0x89, 0xc2, 0xc0, 0xea, 0x06, 0x88, 0x95, + 0x7b, 0xff, 0xff, 0xff, 0x89, 0xc2, 0x83, 0xe2, 0x38, 0xc1, 0xea, 0x03, + 0x24, 0x07, 0x88, 0x85, 0x58, 0xff, 0xff, 0xff, 0x80, 0xfa, 0x04, 0x75, + 0x04, 0x31, 0xd2, 0xeb, 0x17, 0xf7, 0xd2, 0x83, 0xe2, 0x07, 0x8b, 0x8d, + 0x74, 0xff, 0xff, 0xff, 0x8b, 0x14, 0x91, 0x0f, 0xb6, 0x8d, 0x7b, 0xff, + 0xff, 0xff, 0xd3, 0xe2, 0x80, 0xbd, 0x58, 0xff, 0xff, 0xff, 0x05, 0x75, + 0x0d, 0x89, 0xf0, 0x84, 0xc0, 0x75, 0x07, 0xb9, 0x01, 0x00, 0x00, 0x00, + 0xeb, 0x66, 0x0f, 0xb6, 0x85, 0x58, 0xff, 0xff, 0xff, 0xf7, 0xd0, 0x83, + 0xe0, 0x07, 0x8b, 0x8d, 0x74, 0xff, 0xff, 0xff, 0x03, 0x14, 0x81, 0xb9, + 0x01, 0x00, 0x00, 0x00, 0xeb, 0x27, 0x80, 0xf9, 0x05, 0x75, 0x10, 0x89, + 0xf0, 0x84, 0xc0, 0x75, 0x0a, 0x8b, 0x52, 0x04, 0xb9, 0x04, 0x00, 0x00, + 0x00, 0xeb, 0x35, 0x89, 0xc8, 0xf7, 0xd0, 0x83, 0xe0, 0x07, 0x8b, 0x8d, + 0x74, 0xff, 0xff, 0xff, 0x8b, 0x14, 0x81, 0x31, 0xc9, 0x89, 0xf0, 0x2c, + 0x01, 0x75, 0x0e, 0x8b, 0x75, 0x80, 0x0f, 0xbe, 0x46, 0x01, 0x01, 0xc2, + 0x83, 0xc1, 0x01, 0xeb, 0x0f, 0x89, 0xf0, 0x3c, 0x02, 0x75, 0x09, 0x8b, + 0x75, 0x80, 0x03, 0x56, 0x01, 0x83, 0xc1, 0x04, 0x89, 0x55, 0xe0, 0xc7, + 0x45, 0xd8, 0x00, 0x00, 0x00, 0x00, 0x89, 0x4d, 0xe4, 0x89, 0x7d, 0xdc, + 0xc7, 0x45, 0xc8, 0x00, 0x00, 0x00, 0x00, 0x89, 0x7d, 0xcc, 0x89, 0x55, + 0xd0, 0x89, 0x4d, 0xd4, 0x8b, 0x45, 0xd4, 0x83, 0xc0, 0x04, 0x89, 0x85, + 0x70, 0xff, 0xff, 0xff, 0x8b, 0x45, 0xc8, 0x85, 0xc0, 0x74, 0x54, 0x8b, + 0x45, 0xd0, 0x83, 0xf8, 0x01, 0x74, 0x54, 0x85, 0xc0, 0x75, 0x06, 0x66, + 0x0f, 0x6f, 0xc8, 0xeb, 0x4a, 0x83, 0xf8, 0x02, 0x75, 0x06, 0x66, 0x0f, + 0x6f, 0xca, 0xeb, 0x3f, 0x83, 0xf8, 0x03, 0x75, 0x06, 0x66, 0x0f, 0x6f, + 0xcb, 0xeb, 0x34, 0x83, 0xf8, 0x04, 0x75, 0x06, 0x66, 0x0f, 0x6f, 0xcc, + 0xeb, 0x29, 0x83, 0xf8, 0x05, 0x75, 0x06, 0x66, 0x0f, 0x6f, 0xcd, 0xeb, + 0x1e, 0x83, 0xf8, 0x06, 0x75, 0x06, 0x66, 0x0f, 0x6f, 0xce, 0xeb, 0x13, + 0x83, 0xf8, 0x07, 0x75, 0x0e, 0x66, 0x0f, 0x6f, 0xcf, 0xeb, 0x08, 0x8b, + 0x45, 0xd0, 0xf3, 0x0f, 0x6f, 0x48, 0x00, 0x89, 0xd8, 0x25, 0xff, 0xff, + 0xff, 0x00, 0x3d, 0xf2, 0x0f, 0xf0, 0x00, 0x75, 0x09, 0xf3, 0x0f, 0x6f, + 0xc1, 0xe9, 0xe4, 0x03, 0x00, 0x00, 0x3d, 0xf2, 0x0f, 0x12, 0x00, 0x75, + 0x0a, 0x66, 0x0f, 0x70, 0xc1, 0x44, 0xe9, 0xd3, 0x03, 0x00, 0x00, 0x3d, + 0xf3, 0x0f, 0x16, 0x00, 0x0f, 0x85, 0xd5, 0x00, 0x00, 0x00, 0x66, 0x0f, + 0x70, 0xc1, 0xf5, 0x8b, 0x45, 0xcc, 0x85, 0xc0, 0x75, 0x0d, 0x8d, 0x55, + 0x88, 0x66, 0x0f, 0x6f, 0x4a, 0x30, 0xe9, 0x67, 0x04, 0x00, 0x00, 0x83, + 0xf8, 0x01, 0x75, 0x11, 0x8d, 0x4d, 0x88, 0x66, 0x0f, 0x6f, 0xc8, 0x66, + 0x0f, 0x6f, 0x41, 0x20, 0xe9, 0x51, 0x04, 0x00, 0x00, 0x83, 0xf8, 0x02, + 0x75, 0x16, 0x8d, 0x5d, 0x88, 0x66, 0x0f, 0x6f, 0xd0, 0x66, 0x0f, 0x6f, + 0x43, 0x20, 0x66, 0x0f, 0x6f, 0x4b, 0x30, 0xe9, 0x36, 0x04, 0x00, 0x00, + 0x83, 0xf8, 0x03, 0x75, 0x16, 0x8d, 0x75, 0x88, 0x66, 0x0f, 0x6f, 0xd8, + 0x66, 0x0f, 0x6f, 0x46, 0x20, 0x66, 0x0f, 0x6f, 0x4e, 0x30, 0xe9, 0x1b, + 0x04, 0x00, 0x00, 0x83, 0xf8, 0x04, 0x75, 0x16, 0x8d, 0x45, 0x88, 0x66, + 0x0f, 0x6f, 0xe0, 0x66, 0x0f, 0x6f, 0x40, 0x20, 0x66, 0x0f, 0x6f, 0x48, + 0x30, 0xe9, 0x00, 0x04, 0x00, 0x00, 0x83, 0xf8, 0x05, 0x75, 0x16, 0x8d, + 0x55, 0x88, 0x66, 0x0f, 0x6f, 0xe8, 0x66, 0x0f, 0x6f, 0x42, 0x20, 0x66, + 0x0f, 0x6f, 0x4a, 0x30, 0xe9, 0xe5, 0x03, 0x00, 0x00, 0x83, 0xf8, 0x06, + 0x75, 0x16, 0x8d, 0x4d, 0x88, 0x66, 0x0f, 0x6f, 0xf0, 0x66, 0x0f, 0x6f, + 0x41, 0x20, 0x66, 0x0f, 0x6f, 0x49, 0x30, 0xe9, 0xca, 0x03, 0x00, 0x00, + 0x83, 0xf8, 0x07, 0x0f, 0x85, 0xc1, 0x03, 0x00, 0x00, 0x8d, 0x5d, 0x88, + 0x66, 0x0f, 0x6f, 0xf8, 0x66, 0x0f, 0x6f, 0x43, 0x20, 0x66, 0x0f, 0x6f, + 0x4b, 0x30, 0xe9, 0xab, 0x03, 0x00, 0x00, 0x3d, 0xf3, 0x0f, 0x12, 0x00, + 0x75, 0x0a, 0x66, 0x0f, 0x70, 0xc1, 0xa0, 0xe9, 0xe2, 0x02, 0x00, 0x00, + 0x3d, 0xf2, 0x0f, 0x7c, 0x00, 0x75, 0x73, 0x8b, 0x45, 0xcc, 0x85, 0xc0, + 0x74, 0x4b, 0x83, 0xf8, 0x01, 0x75, 0x06, 0x66, 0x0f, 0x6f, 0xc1, 0xeb, + 0x40, 0x83, 0xf8, 0x02, 0x75, 0x06, 0x66, 0x0f, 0x6f, 0xc2, 0xeb, 0x35, + 0x83, 0xf8, 0x03, 0x75, 0x06, 0x66, 0x0f, 0x6f, 0xc3, 0xeb, 0x2a, 0x83, + 0xf8, 0x04, 0x75, 0x06, 0x66, 0x0f, 0x6f, 0xc4, 0xeb, 0x1f, 0x83, 0xf8, + 0x05, 0x75, 0x06, 0x66, 0x0f, 0x6f, 0xc5, 0xeb, 0x14, 0x83, 0xf8, 0x06, + 0x75, 0x06, 0x66, 0x0f, 0x6f, 0xc6, 0xeb, 0x09, 0x83, 0xf8, 0x07, 0x75, + 0x04, 0x66, 0x0f, 0x6f, 0xc7, 0x8d, 0x75, 0x88, 0x66, 0x0f, 0x7f, 0x4e, + 0x00, 0x0f, 0x28, 0xc8, 0x89, 0xf0, 0x0f, 0xc6, 0x40, 0x00, 0x88, 0x89, + 0xf2, 0x0f, 0xc6, 0x4a, 0x00, 0xdd, 0x0f, 0x58, 0xc1, 0xe9, 0x68, 0x02, + 0x00, 0x00, 0x3d, 0xf2, 0x0f, 0x7d, 0x00, 0x75, 0x73, 0x8b, 0x45, 0xcc, + 0x85, 0xc0, 0x74, 0x4b, 0x83, 0xf8, 0x01, 0x75, 0x06, 0x66, 0x0f, 0x6f, + 0xc1, 0xeb, 0x40, 0x83, 0xf8, 0x02, 0x75, 0x06, 0x66, 0x0f, 0x6f, 0xc2, + 0xeb, 0x35, 0x83, 0xf8, 0x03, 0x75, 0x06, 0x66, 0x0f, 0x6f, 0xc3, 0xeb, + 0x2a, 0x83, 0xf8, 0x04, 0x75, 0x06, 0x66, 0x0f, 0x6f, 0xc4, 0xeb, 0x1f, + 0x83, 0xf8, 0x05, 0x75, 0x06, 0x66, 0x0f, 0x6f, 0xc5, 0xeb, 0x14, 0x83, + 0xf8, 0x06, 0x75, 0x06, 0x66, 0x0f, 0x6f, 0xc6, 0xeb, 0x09, 0x83, 0xf8, + 0x07, 0x75, 0x04, 0x66, 0x0f, 0x6f, 0xc7, 0x8d, 0x4d, 0x88, 0x66, 0x0f, + 0x7f, 0x49, 0x00, 0x0f, 0x28, 0xc8, 0x89, 0xcb, 0x0f, 0xc6, 0x43, 0x00, + 0x88, 0x89, 0xce, 0x0f, 0xc6, 0x4e, 0x00, 0xdd, 0x0f, 0x5c, 0xc1, 0xe9, + 0xee, 0x01, 0x00, 0x00, 0x3d, 0x66, 0x0f, 0x7c, 0x00, 0x75, 0x77, 0x8b, + 0x45, 0xcc, 0x85, 0xc0, 0x74, 0x4b, 0x83, 0xf8, 0x01, 0x75, 0x06, 0x66, + 0x0f, 0x6f, 0xc1, 0xeb, 0x40, 0x83, 0xf8, 0x02, 0x75, 0x06, 0x66, 0x0f, + 0x6f, 0xc2, 0xeb, 0x35, 0x83, 0xf8, 0x03, 0x75, 0x06, 0x66, 0x0f, 0x6f, + 0xc3, 0xeb, 0x2a, 0x83, 0xf8, 0x04, 0x75, 0x06, 0x66, 0x0f, 0x6f, 0xc4, + 0xeb, 0x1f, 0x83, 0xf8, 0x05, 0x75, 0x06, 0x66, 0x0f, 0x6f, 0xc5, 0xeb, + 0x14, 0x83, 0xf8, 0x06, 0x75, 0x06, 0x66, 0x0f, 0x6f, 0xc6, 0xeb, 0x09, + 0x83, 0xf8, 0x07, 0x75, 0x04, 0x66, 0x0f, 0x6f, 0xc7, 0x8d, 0x45, 0x88, + 0x66, 0x0f, 0x7f, 0x48, 0x00, 0x66, 0x0f, 0x28, 0xc8, 0x89, 0xc2, 0x66, + 0x0f, 0xc6, 0x42, 0x00, 0x00, 0x89, 0xc1, 0x66, 0x0f, 0xc6, 0x49, 0x00, + 0x03, 0x66, 0x0f, 0x58, 0xc1, 0xe9, 0x70, 0x01, 0x00, 0x00, 0x3d, 0x66, + 0x0f, 0x7d, 0x00, 0x75, 0x77, 0x8b, 0x45, 0xcc, 0x85, 0xc0, 0x74, 0x4b, + 0x83, 0xf8, 0x01, 0x75, 0x06, 0x66, 0x0f, 0x6f, 0xc1, 0xeb, 0x40, 0x83, + 0xf8, 0x02, 0x75, 0x06, 0x66, 0x0f, 0x6f, 0xc2, 0xeb, 0x35, 0x83, 0xf8, + 0x03, 0x75, 0x06, 0x66, 0x0f, 0x6f, 0xc3, 0xeb, 0x2a, 0x83, 0xf8, 0x04, + 0x75, 0x06, 0x66, 0x0f, 0x6f, 0xc4, 0xeb, 0x1f, 0x83, 0xf8, 0x05, 0x75, + 0x06, 0x66, 0x0f, 0x6f, 0xc5, 0xeb, 0x14, 0x83, 0xf8, 0x06, 0x75, 0x06, + 0x66, 0x0f, 0x6f, 0xc6, 0xeb, 0x09, 0x83, 0xf8, 0x07, 0x75, 0x04, 0x66, + 0x0f, 0x6f, 0xc7, 0x8d, 0x5d, 0x88, 0x66, 0x0f, 0x7f, 0x4b, 0x00, 0x66, + 0x0f, 0x28, 0xc8, 0x89, 0xde, 0x66, 0x0f, 0xc6, 0x46, 0x00, 0x00, 0x89, + 0xd8, 0x66, 0x0f, 0xc6, 0x48, 0x00, 0x03, 0x66, 0x0f, 0x5c, 0xc1, 0xe9, + 0xf2, 0x00, 0x00, 0x00, 0x3d, 0xf2, 0x0f, 0xd0, 0x00, 0x75, 0x70, 0x8b, + 0x45, 0xcc, 0x85, 0xc0, 0x74, 0x4b, 0x83, 0xf8, 0x01, 0x75, 0x06, 0x66, + 0x0f, 0x6f, 0xc1, 0xeb, 0x40, 0x83, 0xf8, 0x02, 0x75, 0x06, 0x66, 0x0f, + 0x6f, 0xc2, 0xeb, 0x35, 0x83, 0xf8, 0x03, 0x75, 0x06, 0x66, 0x0f, 0x6f, + 0xc3, 0xeb, 0x2a, 0x83, 0xf8, 0x04, 0x75, 0x06, 0x66, 0x0f, 0x6f, 0xc4, + 0xeb, 0x1f, 0x83, 0xf8, 0x05, 0x75, 0x06, 0x66, 0x0f, 0x6f, 0xc5, 0xeb, + 0x14, 0x83, 0xf8, 0x06, 0x75, 0x06, 0x66, 0x0f, 0x6f, 0xc6, 0xeb, 0x09, + 0x83, 0xf8, 0x07, 0x75, 0x04, 0x66, 0x0f, 0x6f, 0xc7, 0x8d, 0x55, 0x88, + 0x66, 0x0f, 0x7f, 0x42, 0x10, 0x0f, 0x5c, 0xc1, 0x0f, 0xc6, 0xc0, 0x88, + 0x89, 0xd1, 0x0f, 0x58, 0x49, 0x10, 0x0f, 0xc6, 0xc9, 0xdd, 0x0f, 0x14, + 0xc1, 0xeb, 0x7b, 0x3d, 0x66, 0x0f, 0xd0, 0x00, 0x74, 0x0a, 0xb8, 0x05, + 0x00, 0x00, 0x00, 0xe9, 0x30, 0x01, 0x00, 0x00, 0x8b, 0x45, 0xcc, 0x85, + 0xc0, 0x74, 0x4b, 0x83, 0xf8, 0x01, 0x75, 0x06, 0x66, 0x0f, 0x6f, 0xc1, + 0xeb, 0x40, 0x83, 0xf8, 0x02, 0x75, 0x06, 0x66, 0x0f, 0x6f, 0xc2, 0xeb, + 0x35, 0x83, 0xf8, 0x03, 0x75, 0x06, 0x66, 0x0f, 0x6f, 0xc3, 0xeb, 0x2a, + 0x83, 0xf8, 0x04, 0x75, 0x06, 0x66, 0x0f, 0x6f, 0xc4, 0xeb, 0x1f, 0x83, + 0xf8, 0x05, 0x75, 0x06, 0x66, 0x0f, 0x6f, 0xc5, 0xeb, 0x14, 0x83, 0xf8, + 0x06, 0x75, 0x06, 0x66, 0x0f, 0x6f, 0xc6, 0xeb, 0x09, 0x83, 0xf8, 0x07, + 0x75, 0x04, 0x66, 0x0f, 0x6f, 0xc7, 0x8d, 0x5d, 0x88, 0x66, 0x0f, 0x7f, + 0x43, 0x10, 0x66, 0x0f, 0x5c, 0xc1, 0x89, 0xde, 0x66, 0x0f, 0x58, 0x4e, + 0x10, 0x66, 0x0f, 0xc6, 0xc1, 0x03, 0x8b, 0x45, 0xcc, 0x85, 0xc0, 0x75, + 0x0d, 0x8d, 0x45, 0x88, 0x66, 0x0f, 0x6f, 0x48, 0x30, 0xe9, 0xa4, 0x00, + 0x00, 0x00, 0x83, 0xf8, 0x01, 0x75, 0x11, 0x8d, 0x55, 0x88, 0x66, 0x0f, + 0x6f, 0xc8, 0x66, 0x0f, 0x6f, 0x42, 0x20, 0xe9, 0x8e, 0x00, 0x00, 0x00, + 0x83, 0xf8, 0x02, 0x75, 0x13, 0x8d, 0x4d, 0x88, 0x66, 0x0f, 0x6f, 0xd0, + 0x66, 0x0f, 0x6f, 0x41, 0x20, 0x66, 0x0f, 0x6f, 0x49, 0x30, 0xeb, 0x76, + 0x83, 0xf8, 0x03, 0x75, 0x13, 0x8d, 0x5d, 0x88, 0x66, 0x0f, 0x6f, 0xd8, + 0x66, 0x0f, 0x6f, 0x43, 0x20, 0x66, 0x0f, 0x6f, 0x4b, 0x30, 0xeb, 0x5e, + 0x83, 0xf8, 0x04, 0x75, 0x13, 0x8d, 0x75, 0x88, 0x66, 0x0f, 0x6f, 0xe0, + 0x66, 0x0f, 0x6f, 0x46, 0x20, 0x66, 0x0f, 0x6f, 0x4e, 0x30, 0xeb, 0x46, + 0x83, 0xf8, 0x05, 0x75, 0x13, 0x8d, 0x45, 0x88, 0x66, 0x0f, 0x6f, 0xe8, + 0x66, 0x0f, 0x6f, 0x40, 0x20, 0x66, 0x0f, 0x6f, 0x48, 0x30, 0xeb, 0x2e, + 0x83, 0xf8, 0x06, 0x75, 0x13, 0x8d, 0x55, 0x88, 0x66, 0x0f, 0x6f, 0xf0, + 0x66, 0x0f, 0x6f, 0x42, 0x20, 0x66, 0x0f, 0x6f, 0x4a, 0x30, 0xeb, 0x16, + 0x83, 0xf8, 0x07, 0x75, 0x11, 0x8d, 0x4d, 0x88, 0x66, 0x0f, 0x6f, 0xf8, + 0x66, 0x0f, 0x6f, 0x41, 0x20, 0x66, 0x0f, 0x6f, 0x49, 0x30, 0x8b, 0x75, + 0x08, 0x8b, 0x9d, 0x70, 0xff, 0xff, 0xff, 0x01, 0x5e, 0x38, 0x31, 0xc0, + 0x81, 0xc4, 0x9c, 0x00, 0x00, 0x00, 0x5b, 0x5e, 0x5f, 0x5d, 0xc3, 0x00, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90 +}; +const uint32_t sse3emu_size = 4096; diff -Naur xnu-1504.9.26.orig/osfmk/i386/commpage/sse3emu.h xnu-1504.9.26/osfmk/i386/commpage/sse3emu.h --- xnu-1504.9.26.orig/osfmk/i386/commpage/sse3emu.h 1969-12-31 19:00:00.000000000 -0500 +++ xnu-1504.9.26/osfmk/i386/commpage/sse3emu.h 2011-01-09 16:00:20.000000000 -0500 @@ -0,0 +1,11 @@ +#ifndef _SSE3EMU_H +#define _SSE3EMU_H + +#include + +/* mercurysquad / turbo: Voodoo XNU SSE3 emulator. */ + +extern const uint32_t sse3emu_size; +extern const uint8_t sse3emu_data[]; + +#endif \ No newline at end of file diff -Naur xnu-1504.9.26.orig/osfmk/i386/cpu_capabilities.h xnu-1504.9.26/osfmk/i386/cpu_capabilities.h --- xnu-1504.9.26.orig/osfmk/i386/cpu_capabilities.h 2011-01-06 11:45:33.000000000 -0500 +++ xnu-1504.9.26/osfmk/i386/cpu_capabilities.h 2011-01-09 16:00:20.000000000 -0500 @@ -250,6 +250,8 @@ #define _COMM_PAGE_END (_COMM_PAGE_START_ADDRESS+0x1fff) /* end of common page - insert new stuff here */ +#define _COMM_PAGE_SSE3EMU (_COMM_PAGE_START_ADDRESS+0x4000) + /* _COMM_PAGE_COMPARE_AND_SWAP{32,64}B are not used on x86 and are * maintained here for source compatability. These will be removed at * some point, so don't go relying on them. */ diff -Naur xnu-1504.9.26.orig/osfmk/i386/cpu_topology.c xnu-1504.9.26/osfmk/i386/cpu_topology.c --- xnu-1504.9.26.orig/osfmk/i386/cpu_topology.c 2011-01-06 11:45:33.000000000 -0500 +++ xnu-1504.9.26/osfmk/i386/cpu_topology.c 2011-01-09 16:00:20.000000000 -0500 @@ -169,7 +169,7 @@ x86_affinity_set_t *aset; LLC_cachep = lcpup->caches[topoParms.LLCDepth]; - assert(LLC_cachep->type == CPU_CACHE_TYPE_UNIF); + // assert(LLC_cachep->type == CPU_CACHE_TYPE_UNIF); // turbo: this assertion causes null pointer deref aset = find_cache_affinity(LLC_cachep); if (aset == NULL) { aset = (x86_affinity_set_t *) kalloc(sizeof(*aset)); diff -Naur xnu-1504.9.26.orig/osfmk/i386/cpuid.c xnu-1504.9.26/osfmk/i386/cpuid.c --- xnu-1504.9.26.orig/osfmk/i386/cpuid.c 2011-01-06 11:45:33.000000000 -0500 +++ xnu-1504.9.26/osfmk/i386/cpuid.c 2011-01-09 16:00:20.000000000 -0500 @@ -32,8 +32,9 @@ #include #include #include - +#include #include +#include "cpuid_legacy.h" #if MACH_KDB #include #include @@ -246,7 +247,179 @@ } } #endif +boolean_t ForceAmdCpu = FALSE; + +/* Handy functions to check what platform we're on */ +boolean_t IsAmdCPU(void) { + if (ForceAmdCpu) return TRUE; + + uint32_t ourcpuid[4]; + do_cpuid(0, ourcpuid); + if (ourcpuid[ebx] == 0x68747541 && + ourcpuid[ecx] == 0x444D4163 && + ourcpuid[edx] == 0x69746E65) + return TRUE; + else + return FALSE; +}; + +boolean_t IsIntelCPU(void) { + return !IsAmdCPU(); // dirty hack +} + +/* mercurysquad: this function is AMD-specific */ +void +get_amd_cache_info( + i386_cpu_info_t* info_p, + uint32_t linesizes[], + cache_type_t type, + uint32_t* geometry_colors, + uint32_t n_cores) +{ + uint32_t reg[4] = {0, 0, 0, 0}; + uint32_t cache_level; + uint32_t cache_sharing; + uint32_t cache_linesize; + uint32_t cache_associativity; + uint32_t cache_size; + uint32_t cache_sets; + uint32_t cache_partitions; + uint32_t reg_to_use; + uint32_t colors; + uint32_t ncores; + + switch (type) { + case L1D: + cache_level = 1; + do_cpuid(0x80000005, reg); + reg_to_use = ecx; + break; + case L1I: + cache_level = 1; + do_cpuid(0x80000005, reg); + reg_to_use = edx; + break; + case L2U: + cache_level = 2; + do_cpuid(0x80000006, reg); + reg_to_use = ecx; + break; + case L3U: + cache_level = 3; + do_cpuid(0x80000006, reg); + reg_to_use = edx; + break; + case Lnone: + default: + return; + }; + + /* Avoid div0 errors if we couldn't get the # of cores */ + if (n_cores == 0) + ncores = 1; + else + ncores = n_cores; + + cache_size = bitfield32(reg[reg_to_use], 31, 16); + + if (cache_size == 0) { + /* Cache doesn't exist, set it as zero */ + info_p->cache_size[type] = 0; + info_p->cache_sharing[type] = 0; + info_p->cache_partitions[type] = 0; + return; + } + + switch (cache_level) { + case 1: + /* L1 uses only bits 31 to 24 so we should shift right 8 bits + * This is in KB units, and is reported per-core. + */ + cache_size = (cache_size >> 8) * 1024; + cache_sharing = 1; + break; + case 2: + /* L2 cache is in KB units, reported per-core */ + cache_size *= 1024; + cache_sharing = 1; + break; + case 3: + /* L3 is in 512 KB units. This is reported by CPU as total, and + * we export it as per-core. */ + cache_size = cache_size * 512 * 1024 / ncores; + cache_sharing = ncores; + break; + default: + BUG("invalid cache level"); + return; /* not reached, silences optimizer */ + }; + + cache_linesize = bitfield32(reg[reg_to_use], 7, 0); + cache_partitions = bitfield32(reg[reg_to_use], 15, 8); // Needs review + cache_associativity = bitfield32(reg[reg_to_use], 23, 16); + + /* For L2/L3 caches, AMD uses an encoding for associativity. + * The formula is 2 ^ (assoc / 2) + */ + if (cache_level > 1) { + cache_associativity = 1ul << (cache_associativity / 2); + } + + info_p->cache_size[type] = cache_size; + info_p->cache_sharing[type] = cache_sharing; + info_p->cache_partitions[type] = cache_partitions; + + if (type == L2U) { + info_p->cpuid_cache_L2_associativity = cache_associativity; + info_p->cpuid_cache_size = cache_size; + } + + linesizes[type] = cache_linesize; + cache_sets = cache_size / (cache_associativity * cache_linesize * cache_partitions); + colors = ( cache_linesize * cache_sets ) >> 12; + + if ( colors > *geometry_colors ) + *geometry_colors = colors; +} +static void +cpuid_set_amd_cache_info( i386_cpu_info_t * info_p ) +{ + uint32_t linesizes[LCACHE_MAX]; + uint32_t reg[4] = {0, 0, 0, 0}; + + bzero( linesizes, sizeof(linesizes) ); + + /* AMD cpus don't support leaf-2 style cache info (it's annoying anyway). + * But apparently some kexts need this information (refer to the intel version below). + * TODO: Implement AMD cache info to intel leaf-2 format conversion. + */ + + /* + * Get deterministic cache info using AMD's cpuid registers + */ + + /* First get number of cores in the processor */ + do_cpuid(0x80000008, reg); + info_p->cpuid_cores_per_package = bitfield32(reg[ecx], 7, 0) + 1; + info_p->cpuid_logical_per_package = info_p->cpuid_cores_per_package; + + /* Get detailed cache info */ + get_amd_cache_info(info_p, linesizes, L1I, &vm_cache_geometry_colors, info_p->cpuid_cores_per_package); + get_amd_cache_info(info_p, linesizes, L1D, &vm_cache_geometry_colors, info_p->cpuid_cores_per_package); + get_amd_cache_info(info_p, linesizes, L2U, &vm_cache_geometry_colors, info_p->cpuid_cores_per_package); + get_amd_cache_info(info_p, linesizes, L3U, &vm_cache_geometry_colors, info_p->cpuid_cores_per_package); + + /* + * What linesize to publish? We use the L2 linesize if any, + * else the L1D. + */ + if ( linesizes[L2U] ) + info_p->cache_linesize = linesizes[L2U]; + else if (linesizes[L1D]) + info_p->cache_linesize = linesizes[L1D]; + else panic("no linesize"); /* AMD machines should always report a cacheline */ +} /* this function is Intel-specific */ static void cpuid_set_cache_info( i386_cpu_info_t * info_p ) @@ -382,15 +555,49 @@ * If deterministic cache parameters are not available, use * something else */ - if (info_p->cpuid_cores_per_package == 0) { + if (!cpuid_deterministic_supported) { info_p->cpuid_cores_per_package = 1; + /* mercurysquad: iterate over the predefined list of non-det cache info */ + boolean_t foundDescriptor; + intel_nd_cache_info foundInfo; + cache_type_t type = Lnone; + uint32_t colors; + uint32_t cache_sets; + + for (i = 0; i < 64; i++) { + foundDescriptor = FALSE; + for(j = 0; j < 43; j++) { + if (nonDet_CacheInfo[j].encoding == info_p->cache_info[i]) { + foundInfo = nonDet_CacheInfo[j]; + type = foundInfo.type; + foundDescriptor = TRUE; + break; + } + } + if (!foundDescriptor) continue; + + info_p->cache_size[type] = foundInfo.totalsize; + info_p->cache_sharing[type] = 1; + info_p->cache_partitions[type] = foundInfo.partitions; + linesizes[type] = foundInfo.linesize; + cache_sets = foundInfo.totalsize / + (foundInfo.associativity * + foundInfo.linesize * + foundInfo.partitions); + + colors = ( foundInfo.linesize * cache_sets ) >> 12; + + if ( colors > vm_cache_geometry_colors ) + vm_cache_geometry_colors = colors; + } + /* Apple's "something else" -- - /* cpuid define in 1024 quantities */ info_p->cache_size[L2U] = info_p->cpuid_cache_size * 1024; info_p->cache_sharing[L2U] = 1; info_p->cache_partitions[L2U] = 1; linesizes[L2U] = info_p->cpuid_cache_linesize; + */ } /* @@ -602,7 +809,11 @@ static uint32_t cpuid_set_cpufamily(i386_cpu_info_t *info_p) { - uint32_t cpufamily = CPUFAMILY_UNKNOWN; +#ifdef __x86_64__ + uint32_t cpufamily = CPUFAMILY_INTEL_MEROM; +#else + uint32_t cpufamily = CPUFAMILY_INTEL_YONAH; +#endif switch (info_p->cpuid_family) { case 6: @@ -619,6 +830,7 @@ case 23: cpufamily = CPUFAMILY_INTEL_PENRYN; break; + /*case CPUID_MODEL_GULFTOWN:*/ case CPUID_MODEL_NEHALEM: case CPUID_MODEL_FIELDS: case CPUID_MODEL_DALES: @@ -642,22 +854,27 @@ cpuid_set_info(void) { i386_cpu_info_t *info_p = &cpuid_cpu_info; - + uint32_t dummyVar; bzero((void *)info_p, sizeof(cpuid_cpu_info)); cpuid_set_generic_info(info_p); - /* verify we are running on a supported CPU */ - if ((strncmp(CPUID_VID_INTEL, info_p->cpuid_vendor, - min(strlen(CPUID_STRING_UNKNOWN) + 1, - sizeof(info_p->cpuid_vendor)))) || - (cpuid_set_cpufamily(info_p) == CPUFAMILY_UNKNOWN)) - panic("Unsupported CPU"); + /* mercurysquad: removed the supported CPU check, and add routing for AMD cpus */ + /* AnV - This sets also the cpufamily in the info_p struct, readded... */ + cpuid_set_cpufamily(info_p); info_p->cpuid_cpu_type = CPU_TYPE_X86; info_p->cpuid_cpu_subtype = CPU_SUBTYPE_X86_ARCH1; - cpuid_set_cache_info(&cpuid_cpu_info); + if (IsAmdCPU() || PE_parse_boot_argn("-amd", &dummyVar, sizeof (dummyVar))) { + ForceAmdCpu = TRUE; // force from now so we dont have to do cpuid each time + cpuid_set_amd_cache_info(&cpuid_cpu_info); + /* The following is a non-ideal solution but seems to be required */ + if (PE_parse_boot_argn("-emulateintel", &dummyVar, sizeof (dummyVar))) + bcopy(CPUID_VID_INTEL, cpuid_cpu_info.cpuid_vendor, sizeof(CPUID_VID_INTEL)); + } else + cpuid_set_cache_info(&cpuid_cpu_info); + /* * Find the number of enabled cores and threads diff -Naur xnu-1504.9.26.orig/osfmk/i386/cpuid.h xnu-1504.9.26/osfmk/i386/cpuid.h --- xnu-1504.9.26.orig/osfmk/i386/cpuid.h 2011-01-06 11:45:33.000000000 -0500 +++ xnu-1504.9.26/osfmk/i386/cpuid.h 2011-01-09 16:00:20.000000000 -0500 @@ -140,7 +140,7 @@ #define CPUID_MODEL_DALES_32NM 37 /* Clarkdale, Arrandale */ #define CPUID_MODEL_WESTMERE 44 /* Gulftown, Westmere-EP, Westmere-WS */ #define CPUID_MODEL_WESTMERE_EX 47 - +#define CPUID_MODEL_SANDYBRIDGE 42 #ifndef ASSEMBLER #include #include @@ -322,6 +322,9 @@ extern void cpuid_set_info(void); +extern boolean_t IsAmdCPU(void); +extern boolean_t IsIntelCPU(void); + #ifdef __cplusplus } #endif @@ -329,4 +332,13 @@ #endif /* ASSEMBLER */ #endif /* __APPLE_API_PRIVATE */ + +/* kaitek: the following definitions are needed by tsc.c and kern_mib.c */ +#define CPU_FAMILY_PENTIUM_M (0x6) +#define CPU_FAMILY_PENTIUM_4 (0xF) +#define CPU_FAMILY_AMD_PHENOM (0x10) +#define CPU_FAMILY_AMD_SHANGHAI (0x11) +#define CPU_FAMILY_I5 (0x1E) +#define CPU_FAMILY_I9 (0x2C) +#define CPU_FAMILY_SANDY (0x2A) #endif /* _MACHINE_CPUID_H_ */ diff -Naur xnu-1504.9.26.orig/osfmk/i386/cpuid_legacy.h xnu-1504.9.26/osfmk/i386/cpuid_legacy.h --- xnu-1504.9.26.orig/osfmk/i386/cpuid_legacy.h 1969-12-31 19:00:00.000000000 -0500 +++ xnu-1504.9.26/osfmk/i386/cpuid_legacy.h 2011-01-09 16:00:20.000000000 -0500 @@ -0,0 +1,86 @@ +/* + * cpuid_legacy.h + * xnu + * + * Created by mercurysquad on 21/9/08. + * + */ + +/* This file is added to avoid polluting stock cpuid.h */ + +#ifndef _CPUID_LEGACY_H_ +#define _CPUID_LEGACY_H_ + +/* Declarations for extra stuff added to cpuid.c */ +void get_amd_cache_info( + i386_cpu_info_t* info_p, + uint32_t linesizes[], + cache_type_t type, + uint32_t* geometry_colors, + uint32_t ncores); + +/* Declarations for non deterministic cache info */ +typedef struct { + uint32_t encoding; + cache_type_t type; + uint32_t totalsize; + uint32_t associativity; + uint32_t linesize; + uint32_t partitions; +} intel_nd_cache_info; + +//#define KB (1024) +#define MB (1024*KB) + +/* For encoding information, refer to IA32 instruction set reference A-M, CPUID instruction */ +/* Wonder who at Intel came up with this mess */ +const intel_nd_cache_info nonDet_CacheInfo[43] = { +/* byte, type, size, assoc, linesize */ +{ 0x06, L1I, 8*KB, 4, 32, 1 }, +{ 0x08, L1I, 16*KB, 4, 32, 1 }, +{ 0x0A, L1D, 8*KB, 2, 32, 1 }, +{ 0x0C, L1D, 16*KB, 4, 32, 1 }, +{ 0x0E, L1D, 24*KB, 6, 64, 1 }, +{ 0x22, L3U, 512*KB, 4, 64, 2 }, +{ 0x23, L3U, 1*MB, 8, 64, 2 }, +{ 0x25, L3U, 2*MB, 8, 64, 2 }, +{ 0x29, L3U, 4*MB, 8, 64, 2 }, +{ 0x2C, L1D, 32*KB, 8, 64, 2 }, +{ 0x30, L1I, 32*KB, 8, 64, 1 }, +{ 0x41, L2U, 128*KB, 4, 32, 1 }, +{ 0x42, L2U, 256*KB, 4, 32, 1 }, +{ 0x43, L2U, 512*KB, 4, 32, 1 }, +{ 0x44, L2U, 1*MB, 4, 32, 1 }, +{ 0x45, L2U, 2*MB, 4, 32, 1 }, +{ 0x46, L3U, 4*MB, 4, 64, 1 }, +{ 0x47, L3U, 8*MB, 8, 64, 1 }, +{ 0x48, L2U, 3*MB, 12, 64, 1 }, +{ 0x49, L2U, 4*MB, 16, 64, 1 }, // for Xeons family Fh model 6h it's L3U but we dont care +{ 0x4A, L3U, 6*MB, 12, 64, 1 }, +{ 0x4B, L3U, 8*MB, 16, 64, 1 }, +{ 0x4C, L3U, 12*MB, 12, 64, 1 }, +{ 0x4D, L3U, 16*MB, 16, 64, 1 }, +{ 0x4E, L2U, 6*MB, 24, 64, 1 }, +{ 0x60, L1D, 16*KB, 8, 64, 1 }, +{ 0x66, L1D, 8*KB, 4, 64, 1 }, +{ 0x67, L1D, 16*KB, 4, 64, 1 }, +{ 0x68, L1D, 32*KB, 4, 64, 1 }, +{ 0x78, L2U, 1*MB, 4, 64, 1 }, +{ 0x79, L2U, 128*KB, 8, 64, 2 }, +{ 0x7A, L2U, 256*KB, 8, 64, 2 }, +{ 0x7B, L2U, 512*KB, 8, 64, 2 }, +{ 0x7C, L2U, 1*MB, 8, 64, 2 }, +{ 0x7D, L2U, 2*MB, 8, 64, 1 }, +{ 0x7F, L2U, 512*KB, 2, 64, 1 }, +{ 0x80, L2U, 512*KB, 8, 64, 1 }, +{ 0x82, L2U, 256*KB, 8, 32, 1 }, +{ 0x83, L2U, 512*KB, 8, 32, 1 }, +{ 0x84, L2U, 1*MB, 8, 32, 1 }, +{ 0x85, L2U, 2*MB, 8, 32, 1 }, +{ 0x86, L2U, 512*KB, 4, 64, 1 }, +{ 0x87, L2U, 1*MB, 8, 64, 1 } +}; + +#define CPUID_MODEL_GULFTOWN 44 + +#endif // _CPUID_LEGACY_H_ diff -Naur xnu-1504.9.26.orig/osfmk/i386/i386_init.c xnu-1504.9.26/osfmk/i386/i386_init.c --- xnu-1504.9.26.orig/osfmk/i386/i386_init.c 2011-01-06 11:45:33.000000000 -0500 +++ xnu-1504.9.26/osfmk/i386/i386_init.c 2011-01-09 16:00:20.000000000 -0500 @@ -113,6 +113,8 @@ #include #endif /* MACH_KDB */ +#include + int debug_task; static boot_args *kernelBootArgs; @@ -383,6 +385,68 @@ #endif } +/* kaitek: Code for parsing on-the-fly opcode patcher options */ +static void parse_patcher_opts(boolean_t IA32e) +{ + uint32_t boot_arg; + + /* AnV - by default it will only work if force patching is enabled atm + * So if there is no boot_arg, set it by default to 2... + */ + if (PE_parse_boot_argn("patcher_opts", &boot_arg, sizeof(boot_arg))) { + KERN_patcherOpts = boot_arg; + return; + } else if (PE_parse_boot_argn("patcher", &boot_arg, sizeof(boot_arg))) + KERN_patcherOpts = boot_arg; + + if (IsAmdCPU()) + KERN_patcherOpts |= OPT_PATCHER_CPUID; +#ifdef EXTENDED_PATCHER + if (!(cpuid_features() & CPUID_FEATURE_SSE3)) + KERN_patcherOpts |= OPT_PATCHER_LDDQU; +#endif + + /* kaitek: + * sysenter is used for 32-bit software, while syscall is used for 64-bit + * software. on amd, the situation is that 32-bit software running in legacy + * mode works, and 64-bit software running in long mode also works. 32-bit + * software running in compatibility mode, however, makes use of sysenter + * which is not available under either long sub-mode on amd processors. + * + * +--------------------------+--------------------------+ + * | amd64 | intel64 | + * +-------------------+--------+-----------------+--------+-----------------+ + * | mode | | long | | ia-32e | + * +-------------------+ legacy +--------+--------+ legacy +--------+--------+ + * | sub-mode | | 64-bit | compat | | 64-bit | compat | + * +-------------------+--------+--------+--------+--------+--------+--------+ + * | syscall/sysret | yes | yes | yes | no | yes | no | + * | sysenter/sysexit | yes | no | no | yes | yes | yes | + * +-------------------+--------+--------+--------+--------+--------+--------+ + * + */ + if (IA32e && IsAmdCPU()) + KERN_patcherOpts |= OPT_PATCHER_SYSENTER; +} + +#define BOOL_TO_STRING(expr) ((expr) ? "true" : "false") + +void dump_patcher_opts() +{ + printf("dumping patcher options:\n\tverbose: %s\tforce patching: %s\n" + "\tcpuid: %s\tsysenter_trap: %s\n", + BOOL_TO_STRING(KERN_patcherOpts & OPT_PATCHER_DEBUG), + BOOL_TO_STRING(KERN_patcherOpts & OPT_PATCHER_FORCE), + BOOL_TO_STRING(KERN_patcherOpts & OPT_PATCHER_CPUID), + BOOL_TO_STRING(KERN_patcherOpts & OPT_PATCHER_SYSENTER)); +#ifdef EXTENDED_PATCHER + printf("\tlddqu: %s\tfisttp: %s\n", + BOOL_TO_STRING(KERN_patcherOpts & OPT_PATCHER_LDDQU), + BOOL_TO_STRING(KERN_patcherOpts & OPT_PATCHER_FISTTP)); +#endif + delay_for_interval(3, NSEC_PER_SEC); +} + /* * Cpu initialization. Running virtual, but without MACH VM * set up. @@ -476,9 +540,14 @@ * At this point we check whether we are a 64-bit processor * and that we're not restricted to legacy mode, 32-bit operation. */ + uint32_t boot_arg; if (cpuid_extfeatures() & CPUID_EXTFEATURE_EM64T) { kprintf("EM64T supported"); - if (PE_parse_boot_argn("-legacy", &legacy_mode, sizeof (legacy_mode))) { + if (PE_parse_boot_argn("-legacy", &legacy_mode, sizeof (legacy_mode)) || + /* mercurysquad: SSE2 processor can't run in 64bit */ + !(cpuid_features() & CPUID_FEATURE_SSE3) || + /* Also force 32bit until bcopy is fixed */ + IsAmdCPU() || IsIntelCPU()) { kprintf(" but legacy mode forced\n"); IA32e = FALSE; } else { @@ -486,11 +555,17 @@ } } else IA32e = FALSE; + if (PE_parse_boot_argn("-force64", &boot_arg, sizeof(boot_arg))) { + kprintf("EM64T forced\n"); + IA32e = TRUE; + } #endif if (!(cpuid_extfeatures() & CPUID_EXTFEATURE_XD)) nx_enabled = 0; + parse_patcher_opts(IA32e); + /* Obtain "lcks" options:this currently controls lock statistics */ if (!PE_parse_boot_argn("lcks", &LcksOpts, sizeof (LcksOpts))) LcksOpts = 0; diff -Naur xnu-1504.9.26.orig/osfmk/i386/idt.s xnu-1504.9.26/osfmk/i386/idt.s --- xnu-1504.9.26.orig/osfmk/i386/idt.s 2011-01-06 11:45:38.000000000 -0500 +++ xnu-1504.9.26/osfmk/i386/idt.s 2011-01-09 16:00:20.000000000 -0500 @@ -456,8 +456,13 @@ INTERRUPT(0xf8) INTERRUPT(0xf9) INTERRUPT(0xfa) -INTERRUPT(0xfb) -INTERRUPT(0xfc) +/* semthex / mercurysquad: + * This next interrupt (0xFB) is used for fake CPUID on AMD or other CPUs. + * The binary patcher replaces all cpuid instructions in a Mach-O binary with int 0xfb, + * which is then caught by this handler which sends Intel cpuid instead of AMD or whatever. + */ +EXCEP_SPC_USR(0xfb, t_fake_cpuid) +EXCEP_SPC_USR(0xfc, t_fake_sysenter) INTERRUPT(0xfd) INTERRUPT(0xfe) EXCEPTION(0xff,t_preempt) @@ -470,6 +475,31 @@ .text +/* semthex / mercurysquad: +* The following is the int 0xfb handler which will return a fake cpuid string +*/ +Entry(t_fake_cpuid) + pushf // Must not modify eflags! + testl %eax, %eax // If eax == 0, we are getting cpuid string, so compare and set ZF for later + cpuid // Get cpuid; we'll patch it later based on the above test + // Note that cpuid does not affect any flags, so it's ok to compare + // first, do cpuid, and then do the conditional jump + jnz 2f // eax was not zero: we are getting feature bits etc, so skip patching +1: // otherwise, eax was zero so patch the cpuid vendor string with GenuineIntel + movl $0x756e6547, %ebx // "Genu" + movl $0x49656e69, %edx // "ineI" + movl $0x6c65746e, %ecx // "ntel" +2: + popf // restore eflags + iret // Done. Return from interrupt + +Entry(t_fake_sysenter) + pushl %eax /* save system call number */ + pushl $0 /* clear trap number slot */ + pusha /* save the general registers */ + movl $EXT(lo_sysenter),%ebx + jmp enter_lohandler + /******************************************************************************************************* * diff -Naur xnu-1504.9.26.orig/osfmk/i386/idt64.s xnu-1504.9.26/osfmk/i386/idt64.s --- xnu-1504.9.26.orig/osfmk/i386/idt64.s 2011-01-06 11:45:38.000000000 -0500 +++ xnu-1504.9.26/osfmk/i386/idt64.s 2011-01-09 16:00:20.000000000 -0500 @@ -425,14 +425,39 @@ INTERRUPT64(0xf8) INTERRUPT64(0xf9) INTERRUPT64(0xfa) -INTERRUPT64(0xfb) -INTERRUPT64(0xfc) +EXCEP64_SPC_USR(0xfb, t64_fake_cpuid) +EXCEP64_SPC_USR(0xfc, t64_fake_sysenter) INTERRUPT64(0xfd) INTERRUPT64(0xfe) EXCEPTION64(0xff,t64_preempt) .text + + +/* kaitek: for an in-depth description of the int 0xfb handler, see t_fake_cpuid in idt.s */ + +Entry(t64_fake_cpuid) + pushf + testl %eax, %eax // cpuid only checks the lower dword of rax + cpuid + jnz 2f +1: + movq $0x756e6547, %rbx // results, however, are zero-extended to qword size + movq $0x49656e69, %rdx + movq $0x6c65746e, %rcx +2: + popf + iretq // operand size is *very* important here + +Entry(t64_fake_sysenter) + swapgs /* switch to kernel gs (cpu_data) */ + push %rax /* save system call number */ + push $(UNIX_INT) /* only used for statistics */ + movl $EXT(lo_sysenter),4(%rsp) + jmp L_32bit_enter_check + + /* * * Trap/interrupt entry points. diff -Naur xnu-1504.9.26.orig/osfmk/i386/lapic.c xnu-1504.9.26/osfmk/i386/lapic.c --- xnu-1504.9.26.orig/osfmk/i386/lapic.c 2011-01-06 11:45:33.000000000 -0500 +++ xnu-1504.9.26/osfmk/i386/lapic.c 2011-01-09 16:00:20.000000000 -0500 @@ -213,7 +213,8 @@ lapic_id = (unsigned long)(lapic_start + LAPIC_ID); if ((LAPIC_READ(VERSION)&LAPIC_VERSION_MASK) < 0x14) { - panic("Local APIC version 0x%x, 0x14 or more expected\n", +// qoopz: panics on some amds + printf("Local APIC version 0x%x, 0x14 or more expected\n", (LAPIC_READ(VERSION)&LAPIC_VERSION_MASK)); } @@ -471,6 +472,11 @@ value |= LAPIC_LVT_DM_EXTINT; LAPIC_WRITE(LVT_LINT0, value); } + else + LAPIC_WRITE(LVT_LINT0, LAPIC_LVT_DM_EXTINT | LAPIC_LVT_MASKED); + + /* NMI: ummasked, off course */ + LAPIC_WRITE(LVT_LINT1, LAPIC_LVT_DM_NMI); /* Timer: unmasked, one-shot */ LAPIC_WRITE(LVT_TIMER, LAPIC_VECTOR(TIMER)); @@ -644,12 +650,14 @@ esr = lapic_esr_read(); lapic_dump(); - if ((debug_boot_arg && (lapic_dont_panic == FALSE)) || - cpu_number() != master_cpu) { + /* apocolipse LAPIC fix */ + if (debug_boot_arg && (lapic_dont_panic == FALSE)) + { + panic("Local APIC error, ESR: %d\n", esr); } - if (cpu_number() == master_cpu) { + if (TRUE) { uint64_t abstime = mach_absolute_time(); if ((abstime - lapic_last_master_error) < lapic_error_time_threshold) { if (lapic_master_error_count++ > lapic_error_count_threshold) { @@ -662,7 +670,7 @@ lapic_last_master_error = abstime; lapic_master_error_count = 0; } - printf("Local APIC error on master CPU, ESR: %d, error count this run: %d\n", esr, lapic_master_error_count); + printf("Local APIC error on CPU%d, ESR: %d, error count this run: %d\n",cpu_number(), esr, lapic_master_error_count); } _lapic_end_of_interrupt(); diff -Naur xnu-1504.9.26.orig/osfmk/i386/patcher_opts.h xnu-1504.9.26/osfmk/i386/patcher_opts.h --- xnu-1504.9.26.orig/osfmk/i386/patcher_opts.h 1969-12-31 19:00:00.000000000 -0500 +++ xnu-1504.9.26/osfmk/i386/patcher_opts.h 2011-01-09 16:00:20.000000000 -0500 @@ -0,0 +1,23 @@ +#ifndef _PATCHER_OPTS_H +#define _PATCHER_OPTS_H + +/* kaitek: patcher_opts sysctl key and boot argument */ + +extern int KERN_patcherOpts; + +#define OPT_PATCHER_DEBUG (1 << 0) +#define OPT_PATCHER_FORCE (1 << 1) + +#define OPT_PATCHER_CPUID (1 << 4) +#define OPT_PATCHER_SYSENTER (1 << 5) +#ifdef EXTENDED_PATCHER +# define OPT_PATCHER_LDDQU (1 << 6) +# define OPT_PATCHER_FISTTP (1 << 7) +# define OPT_PATCHER_ALL (OPT_PATCHER_CPUID|OPT_PATCHER_SYSENTER|OPT_PATCHER_LDDQU|OPT_PATCHER_FISTTP) +#else +# define OPT_PATCHER_ALL (OPT_PATCHER_CPUID|OPT_PATCHER_SYSENTER) +#endif + +void dump_patcher_opts(void); + +#endif diff -Naur xnu-1504.9.26.orig/osfmk/i386/pmCPU.c xnu-1504.9.26/osfmk/i386/pmCPU.c --- xnu-1504.9.26.orig/osfmk/i386/pmCPU.c 2011-01-06 11:45:33.000000000 -0500 +++ xnu-1504.9.26/osfmk/i386/pmCPU.c 2011-01-09 16:00:20.000000000 -0500 @@ -661,7 +661,14 @@ pmKextRegister(uint32_t version, pmDispatch_t *cpuFuncs, pmCallBacks_t *callbacks) { - if (callbacks != NULL && version == PM_DISPATCH_VERSION) { + /* AnV Software: Set it anyway and don't panic please... + This causes issues for SleepEnabler.kext... + Warn about version mismatch though... */ + + if (version == PM_DISPATCH_VERSION) + printf("Warning: Version mis-match between Kernel and CPU PM (0x%8X != 0x%8X)\n", version, PM_DISPATCH_VERSION); + + if (callbacks != NULL) { callbacks->setRTCPop = setPop; callbacks->resyncDeadlines = pmReSyncDeadlines; callbacks->initComplete = pmInitComplete; @@ -684,7 +691,9 @@ callbacks->RTCClockAdjust = rtc_clock_adjust; callbacks->topoParms = &topoParms; } else { - panic("Version mis-match between Kernel and CPU PM"); + // qoopz: diff kernel crashes between versions, need cpupm disabler. + // AnV - Just warn... + printf("Version mis-match between Kernel and CPU PM"); } if (cpuFuncs != NULL) { diff -Naur xnu-1504.9.26.orig/osfmk/i386/pm_timer.h xnu-1504.9.26/osfmk/i386/pm_timer.h --- xnu-1504.9.26.orig/osfmk/i386/pm_timer.h 1969-12-31 19:00:00.000000000 -0500 +++ xnu-1504.9.26/osfmk/i386/pm_timer.h 2011-01-09 16:00:20.000000000 -0500 @@ -0,0 +1,18 @@ +/* + * (c) 2006 Arekhta Dmitri (DaemonES@gmail.com) + * Provided under GPL 2 + */ + +#ifndef _PM_TIMER_H_ +#define _PM_TIMER_H_ + +extern char init_pmt(void); +extern void disable_pmt(void); +extern uint32_t read_pmt(void); +extern uint32_t pmt_get_diff(uint32_t begTicks, uint32_t endTicks); + +extern uint32_t pmt_freq; +extern uint32_t pmt_mask; +extern char acpi_enabled; + +#endif diff -Naur xnu-1504.9.26.orig/osfmk/i386/rtclock.c xnu-1504.9.26/osfmk/i386/rtclock.c --- xnu-1504.9.26.orig/osfmk/i386/rtclock.c 2011-01-06 11:45:34.000000000 -0500 +++ xnu-1504.9.26/osfmk/i386/rtclock.c 2011-01-09 16:00:20.000000000 -0500 @@ -74,9 +74,11 @@ #include #define NSEC_PER_HZ (NSEC_PER_SEC / 100) /* nsec per tick */ - +#define CLKNUM 1193182 // Actually 1193181.666... #define UI_CPUFREQ_ROUNDING_FACTOR 10000000 +uint32_t rtclock_boot_frequency = 0; /* mercurysquad: needed for rtclock_stepped */ + int rtclock_config(void); int rtclock_init(void); @@ -93,6 +95,177 @@ rtc_nanotime_t rtc_nanotime_info = {0,0,0,0,1,0}; +#ifdef __i386__ +/* mercurysquad: Following code until the timeRDTSC() function comes from xnu-792 + * It will be used in tsc.c as a fallback option to initialize the tsc when bus ratio + * read from the cpu (or fsb from EFI) is unreliable. This method is unreliable on processors + * whose tsc varies depending on load, freq or temperature (e.g. Pentium M). Use with care. + */ +/* + * Enable or disable timer 2. + * Port 0x61 controls timer 2: + * bit 0 gates the clock, + * bit 1 gates output to speaker. + */ +inline static void +enable_PIT2(void) +{ + asm volatile( + " inb $0x61,%%al \n\t" + " and $0xFC,%%al \n\t" + " or $1,%%al \n\t" + " outb %%al,$0x61 \n\t" + : : : "%al" ); +} + +inline static void +disable_PIT2(void) +{ + asm volatile( + " inb $0x61,%%al \n\t" + " and $0xFC,%%al \n\t" + " outb %%al,$0x61 \n\t" + : : : "%al" ); +} + +inline static void +set_PIT2(int value) +{ +/* + * First, tell the clock we are going to write 16 bits to the counter + * and enable one-shot mode (command 0xB8 to port 0x43) + * Then write the two bytes into the PIT2 clock register (port 0x42). + * Loop until the value is "realized" in the clock, + * this happens on the next tick. + */ + asm volatile( + " movb $0xB8,%%al \n\t" + " outb %%al,$0x43 \n\t" + " movb %%dl,%%al \n\t" + " outb %%al,$0x42 \n\t" + " movb %%dh,%%al \n\t" + " outb %%al,$0x42 \n" +"1: inb $0x42,%%al \n\t" + " inb $0x42,%%al \n\t" + " cmp %%al,%%dh \n\t" + " jne 1b" + : : "d"(value) : "%al"); +} + +inline static uint64_t +get_PIT2(unsigned int *value) +{ + register uint64_t result; +/* + * This routine first latches the time (command 0x80 to port 0x43), + * then gets the time stamp so we know how long the read will take later. + * Read (from port 0x42) and return the current value of the timer. + */ +#ifdef __i386__ + asm volatile( + " xorl %%ecx,%%ecx \n\t" + " movb $0x80,%%al \n\t" + " outb %%al,$0x43 \n\t" + " rdtsc \n\t" + " pushl %%eax \n\t" + " inb $0x42,%%al \n\t" + " movb %%al,%%cl \n\t" + " inb $0x42,%%al \n\t" + " movb %%al,%%ch \n\t" + " popl %%eax " + : "=A"(result), "=c"(*value)); +#else /* __x86_64__ */ + asm volatile( + " xorq %%rcx,%%rcx \n\t" + " movb $0x80,%%al \n\t" + " outb %%al,$0x43 \n\t" + " rdtsc \n\t" + " pushq %%rax \n\t" + " inb $0x42,%%al \n\t" + " movb %%al,%%cl \n\t" + " inb $0x42,%%al \n\t" + " movb %%al,%%ch \n\t" + " popq %%rax " + : "=A"(result), "=c"(*value)); +#endif + + return result; +} + +/* + * timeRDTSC() + * This routine sets up PIT counter 2 to count down 1/20 of a second. + * It pauses until the value is latched in the counter + * and then reads the time stamp counter to return to the caller. + */ +uint64_t timeRDTSC(void) +{ + int attempts = 0; + uint64_t latchTime; + uint64_t saveTime,intermediate; + unsigned int timerValue, lastValue; + boolean_t int_enabled; + /* + * Table of correction factors to account for + * - timer counter quantization errors, and + * - undercounts 0..5 + */ +#define SAMPLE_CLKS_EXACT (((double) CLKNUM) / 20.0) +#define SAMPLE_CLKS_INT ((int) CLKNUM / 20) +#define SAMPLE_NSECS (2000000000LL) +#define SAMPLE_MULTIPLIER (((double)SAMPLE_NSECS)*SAMPLE_CLKS_EXACT) +#define ROUND64(x) ((uint64_t)((x) + 0.5)) + uint64_t scale[6] = { + ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-0)), + ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-1)), + ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-2)), + ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-3)), + ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-4)), + ROUND64(SAMPLE_MULTIPLIER/(double)(SAMPLE_CLKS_INT-5)) + }; + + int_enabled = ml_set_interrupts_enabled(FALSE); + +restart: + if (attempts >= 9) // increase to up to 9 attempts. + // This will flash-reboot. TODO: Use tscPanic instead. + panic("Timestamp counter calibation failed with %d attempts\n", attempts); + attempts++; + enable_PIT2(); // turn on PIT2 + set_PIT2(0); // reset timer 2 to be zero + latchTime = rdtsc64(); // get the time stamp to time + latchTime = get_PIT2(&timerValue) - latchTime; // time how long this takes + set_PIT2(SAMPLE_CLKS_INT); // set up the timer for (almost) 1/20th a second + saveTime = rdtsc64(); // now time how long a 20th a second is... + get_PIT2(&lastValue); + get_PIT2(&lastValue); // read twice, first value may be unreliable + do { + intermediate = get_PIT2(&timerValue); + if (timerValue > lastValue) { + // Timer wrapped + set_PIT2(0); + disable_PIT2(); + goto restart; + } + lastValue = timerValue; + } while (timerValue > 5); + kprintf("timerValue %d\n",timerValue); + kprintf("intermediate 0x%016llx\n",intermediate); + kprintf("saveTime 0x%016llx\n",saveTime); + + intermediate -= saveTime; // raw count for about 1/20 second + intermediate *= scale[timerValue]; // rescale measured time spent + intermediate /= SAMPLE_NSECS; // so its exactly 1/20 a second + intermediate += latchTime; // add on our save fudge + + set_PIT2(0); // reset timer 2 to be zero + disable_PIT2(); // turn off PIT 2 + + ml_set_interrupts_enabled(int_enabled); + return intermediate; +} +#endif + /* * tsc_to_nanoseconds: * @@ -309,8 +482,8 @@ assert(!ml_get_interrupts_enabled()); tsc = rdtsc64(); - oldnsecs = rntp->ns_base + _tsc_to_nanoseconds(tsc - rntp->tsc_base); - newnsecs = base + _tsc_to_nanoseconds(tsc - tsc_base); + oldnsecs = rntp->ns_base + _tsc_to_nanoseconds((tsc - rntp->tsc_base) * RTCLOCK_SCALE_UP_BY); + newnsecs = base + _tsc_to_nanoseconds((tsc - tsc_base) * RTCLOCK_SCALE_UP_BY); /* * Only update the base values if time using the new base values @@ -345,14 +518,49 @@ rtc_clock_stepping(__unused uint32_t new_frequency, __unused uint32_t old_frequency) { - panic("rtc_clock_stepping unsupported"); + /* mercurysquad: Re-implemented 2009-05-24 + * note that after stepping, we restore the ns base to the saved value + * but there might have been a finite interval during which the stepping + * took place, which will be unaccounted for. FIXME: use another timer + * source to correct for this */ + + rtc_nanotime_t *rntp = current_cpu_datap()->cpu_nanotime; + boolean_t istate; + + istate = ml_set_interrupts_enabled(FALSE); + + rntp->ns_base = rtc_nanotime_read(); // save current ns base + rntp->tsc_base = rdtsc64(); + + ml_set_interrupts_enabled(istate); + return; } void -rtc_clock_stepped(__unused uint32_t new_frequency, - __unused uint32_t old_frequency) +rtc_clock_stepped(uint32_t new_frequency, + uint32_t old_frequency) { - panic("rtc_clock_stepped unsupported"); + /* mercurysquad: Re-implement 2009-05-24 + * Originally written by Turbo to use 'slow' clock calculation method + * Updated to use scaled-tsc fast calculations */ + + rtc_nanotime_t *rntp = current_cpu_datap()->cpu_nanotime; + uint64_t cycles, ns_base; + boolean_t istate; + + if (rtclock_boot_frequency == 0) /* first step since boot time */ + rtclock_boot_frequency = old_frequency; + + cycles = (new_frequency * tscFreq / rtclock_boot_frequency); + ns_base = rntp->ns_base; + + istate = ml_set_interrupts_enabled(FALSE); + + rntp->scale = (uint32_t)(((uint64_t)NSEC_PER_SEC << 32) / (cycles * RTCLOCK_SCALE_UP_BY)); + rntp->shift = 32; // just to be safe, though this was already set at bootup + rtc_nanotime_init(ns_base); + + ml_set_interrupts_enabled(istate); } /* @@ -431,7 +639,12 @@ rtc_set_timescale(uint64_t cycles) { rtc_nanotime_t *rntp = current_cpu_datap()->cpu_nanotime; - rntp->scale = (uint32_t)(((uint64_t)NSEC_PER_SEC << 32) / cycles); + /* mercurysquad: we will scale up the tscFreq so that it will + * go above 1 GHz even if the actual tsc freq is smaller. Then + * while converting tsc -> nanosec, we will scale the tsc read + * from the processor also, so it correspond to our scaled tsc freq + */ + rntp->scale = (uint32_t)(((uint64_t)NSEC_PER_SEC << 32) / (cycles * RTCLOCK_SCALE_UP_BY)); if (cycles <= SLOW_TSC_THRESHOLD) rntp->shift = (uint32_t)cycles; @@ -639,6 +852,15 @@ uint64_t nanoseconds, uint64_t *result) { + /* mercurysquad: Magic value for IntelEnhancedSpeedStep.kext + * and any other kexts / utilities which might want to use this. + * Basically we have a 64bit int as in/out params, which is plenty + * of space to define magic args and their return values. + * Currently the only one defined is ~0. Next one could be ~1 and so on. + * TODO: Get rid of this and use a sysctl key instead */ + if (nanoseconds == ~(0ULL)) + *result = 2ULL; + else *result = nanoseconds; } diff -Naur xnu-1504.9.26.orig/osfmk/i386/rtclock.h xnu-1504.9.26/osfmk/i386/rtclock.h --- xnu-1504.9.26.orig/osfmk/i386/rtclock.h 2011-01-06 11:45:34.000000000 -0500 +++ xnu-1504.9.26/osfmk/i386/rtclock.h 2011-01-09 16:00:20.000000000 -0500 @@ -58,6 +58,7 @@ #endif struct cpu_data; +uint64_t timeRDTSC(void); /* mercurysquad: Used in tsc.c */ extern uint64_t tsc_rebase_abs_time; @@ -80,6 +81,9 @@ #endif #define SLOW_TSC_THRESHOLD 1000067800 /* TSC is too slow for regular nanotime() algorithm */ +#define RTCLOCK_SCALE_UP_BITS 4 /* mercurysquad: Refer to rtclock.c */ +#define RTCLOCK_SCALE_UP_BY (1 << RTCLOCK_SCALE_UP_BITS) +#define RTCLOCK_SCALE_UP_MASK (RTCLOCK_SCALE_UP_BY - 1) #if defined(__i386__) /* @@ -96,6 +100,13 @@ lfence ; \ subl RNT_TSC_BASE(%edi),%eax ; \ sbbl RNT_TSC_BASE+4(%edi),%edx /* tsc - tsc_base */ ; \ + shll $ RTCLOCK_SCALE_UP_BITS, %edx /* scale up */ ; \ + roll $ RTCLOCK_SCALE_UP_BITS, %eax ; \ + movl %eax,%ecx ; \ + andl $ RTCLOCK_SCALE_UP_MASK, %ecx ; \ + addl %ecx,%edx ; \ + notl %ecx ; \ + andl %ecx,%eax ; \ movl RNT_SCALE(%edi),%ecx /* * scale factor */ ; \ movl %edx,%ebx ; \ mull %ecx ; \ @@ -126,6 +137,8 @@ shlq $32,%rdx ; \ orq %rdx,%rax /* %rax := tsc */ ; \ subq RNT_TSC_BASE(%rdi),%rax /* tsc - tsc_base */ ; \ + movq $ RTCLOCK_SCALE_UP_BY, %rcx /* scale up the tsc */ ; \ + mulq %rcx ; \ xorq %rcx,%rcx ; \ movl RNT_SCALE(%rdi),%ecx ; \ mulq %rcx /* delta * scale */ ; \ diff -Naur xnu-1504.9.26.orig/osfmk/i386/start.s xnu-1504.9.26/osfmk/i386/start.s --- xnu-1504.9.26.orig/osfmk/i386/start.s 2011-01-06 11:45:38.000000000 -0500 +++ xnu-1504.9.26/osfmk/i386/start.s 2011-01-09 16:00:20.000000000 -0500 @@ -174,6 +174,19 @@ POSTCODE(PSTART_ENTRY) +/* mercurysquad / turbo: + * Setup SSE3 emulation early on in the boot process by patching IDT if SSE3 is not detected + */ + movl EXT(sse3emu_size), %eax // Load size of SSE3 emulator blob into eax + testl %eax, %eax // Check if it is zero + jz 1f // Kernel was built without blob, so don't patch IDT + movl $1, %eax // Put 1 in eax so that cpuid returns extra features + cpuid // This puts some cpu feature bits in ecx + testl $(CPUID_FEATURE_SSE3), %ecx // Test whether SSE3 is present + jnz 1f // If present (ie. flag was 1), skip IDT patching + lea EXT(master_idt), %esi // Otherwise get address of master_idt in esi + movl $0xffff4000, 0x30(%esi,1) // Put sse3emu address in the proper offset from [esi] +1: lgdt EXT(gdtptr) /* load GDT */ mov $(KERNEL_DS),%ax /* set kernel data segment */ @@ -329,10 +342,18 @@ orl $(CR4_PAE),%eax movl %eax,%cr4 /* enable page size extensions */ + /* mercurysquad: check for NXE support and skip setting NXE if not supported */ + movl $0x80000001, %eax /* set eax to get feature bits */ + cpuid /* Get cpuid */ + test $(CPUID_EXTFEATURE_XD), %edx /* Test for NXE support */ + jz 1f /* Not supported, skip NXE */ + + /* Otherwise, set NXE bit */ movl $(MSR_IA32_EFER), %ecx /* MSR number in ecx */ rdmsr /* MSR value return in edx: eax */ orl $(MSR_IA32_EFER_NXE), %eax /* Set NXE bit in low 32-bits */ wrmsr /* Update Extended Feature Enable reg */ +1: movl %cr0, %eax orl $(CR0_PG|CR0_WP), %eax diff -Naur xnu-1504.9.26.orig/osfmk/i386/trap.c xnu-1504.9.26/osfmk/i386/trap.c --- xnu-1504.9.26.orig/osfmk/i386/trap.c 2011-01-06 11:45:34.000000000 -0500 +++ xnu-1504.9.26/osfmk/i386/trap.c 2011-01-09 16:00:20.000000000 -0500 @@ -813,6 +813,53 @@ thread->recover = 0; return; } + + /* kaitek: catch rdmsr/wrmsr with invalid msr values from kernel mode (thanks to + * kabyl for suggesting this) + * AnV: added 64 bit version */ + vm_offset_t addr; + boolean_t exists; + +#ifdef __LP64__ + addr = saved_state->isf.rip; +#else + addr = saved_state->eip; +#endif + exists = vm_map_check_protection(kernel_map, addr, addr + 2, VM_PROT_READ); + if (exists) { + uint16_t opcode; + + opcode = *(uint16_t *) addr; + if (opcode == 0x320f) { +#ifdef __LP64__ + printf("[MSR] detected invalid rdmsr(%16x) at 0x%16x\n", + (unsigned int)saved_state->rcx, (unsigned int)addr); + saved_state->rdx = 0; + saved_state->rax = 0; +#else + printf("[MSR] detected invalid rdmsr(%08x) at 0x%08x\n", + saved_state->ecx, addr); + saved_state->edx = 0; + saved_state->eax = 0; +#endif + set_recovery_ip(saved_state, addr + 2); + return; + } else if (opcode == 0x300f) { +#ifdef __LP64__ + printf("[MSR] detected invalid wrmsr(%16x, %16x:%16x) at 0x%16x\n", + (unsigned int)saved_state->rcx, (unsigned int)saved_state->rdx, (unsigned int)saved_state->rax, + (unsigned int)addr); +#else + printf("[MSR] detected invalid wrmsr(%08x, %08x:%08x) at 0x%08x\n", + saved_state->ecx, saved_state->edx, saved_state->eax, + addr); +#endif + set_recovery_ip(saved_state, addr + 2); + return; + } + } else + printf("warning: invalid kernel ip, won't attempt to handle trap\n"); + /* * Unanticipated page-fault errors in kernel * should not happen. diff -Naur xnu-1504.9.26.orig/osfmk/i386/trap.h xnu-1504.9.26/osfmk/i386/trap.h --- xnu-1504.9.26.orig/osfmk/i386/trap.h 2011-01-06 11:45:34.000000000 -0500 +++ xnu-1504.9.26/osfmk/i386/trap.h 2011-01-09 16:00:20.000000000 -0500 @@ -84,6 +84,7 @@ #define T_SSE_FLOAT_ERROR 19 /* 20-126 */ #define T_DTRACE_RET 127 +#define T_FAKE_CPUID 251 /* The SYSENTER and SYSCALL trap numbers are software constructs. * These exceptions are dispatched directly to the system call handlers. diff -Naur xnu-1504.9.26.orig/osfmk/i386/tsc.c xnu-1504.9.26/osfmk/i386/tsc.c --- xnu-1504.9.26.orig/osfmk/i386/tsc.c 2011-01-06 11:45:34.000000000 -0500 +++ xnu-1504.9.26/osfmk/i386/tsc.c 2011-01-09 16:00:20.000000000 -0500 @@ -74,6 +74,7 @@ uint64_t tscGranularity = 0; uint64_t bus2tsc = 0; uint64_t busFreq = 0; +uint32_t kTscPanicOn = 0; uint32_t flex_ratio = 0; uint32_t flex_ratio_min = 0; uint32_t flex_ratio_max = 0; @@ -90,9 +91,31 @@ #define Tera (kilo * Giga) #define Peta (kilo * Tera) -#define CPU_FAMILY_PENTIUM_M (0x6) +/* mercurysquad: The following enum specifies one of the bus ratio calc paths to take */ +typedef enum { + BUSRATIO_BOOTFLAG, + BUSRATIO_ATHLON, + BUSRATIO_EFI, + BUSRATIO_PHENOM_SHANGHAI, + BUSRATIO_INTEL_MSR, + BUSRATIO_AUTODETECT, + BUSRATIO_PENTIUM4_MSR, // P4 model 2+ have an MSR too + BUSRATIO_TIMER +} busratio_path_t; + +static const char* busRatioPathNames[] = { + "Boot-time argument", + "AMD Athlon", + "Pentium 4 (via EFI)", + "AMD Phenom", + "Intel / Apple", + "Autodetect", + "Pentium 4 (via MSR)", + "Time the TSC" +}; static const char FSB_Frequency_prop[] = "FSBFrequency"; +static const char FSB_CPUFrequency_prop[] = "CPUFrequency"; /* * This routine extracts the bus frequency in Hz from the device tree. */ @@ -126,6 +149,66 @@ } return frequency; } +/* mercurysquad: + * This routine extracts the cpu frequency from the efi device tree + * The value should be set by a custom EFI bootloader (only needed on CPUs which + * don't report the bus ratio in one of the MSRs.) + */ +static uint64_t +EFI_CPU_Frequency(void) +{ + uint64_t frequency = 0; + DTEntry entry; + void *value; + unsigned int size; + + if (DTLookupEntry(0, "/efi/platform", &entry) != kSuccess) { + kprintf("EFI_CPU_Frequency: didn't find /efi/platform\n"); + return 0; + } + if (DTGetProperty(entry,FSB_CPUFrequency_prop,&value,&size) != kSuccess) { + kprintf("EFI_CPU_Frequency: property %s not found\n", + FSB_Frequency_prop); + return 0; + } + if (size == sizeof(uint64_t)) { + frequency = *(uint64_t *) value; + kprintf("EFI_CPU_Frequency: read %s value: %llu\n", + FSB_Frequency_prop, frequency); + if (!(10*Mega < frequency && frequency < 50*Giga)) { + kprintf("EFI_Fake_MSR: value out of range\n"); + frequency = 0; + } + } else { + kprintf("EFI_CPU_Frequency: unexpected size %d\n", size); + } + return frequency; +} + +/* + * Convert the cpu frequency info into a 'fake' MSR198h in Intel format + */ +static uint64_t +getFakeMSR(uint64_t frequency, uint64_t bFreq) { + uint64_t fakeMSR = 0ull; + uint64_t multi = 0; + + if (frequency == 0 || bFreq == 0) + return 0; + + multi = frequency / (bFreq / 1000); // = multi*1000 + // divide by 1000, rounding up if it was x.75 or more + // Example: 12900 will get rounded to 13150/1000 = 13 + // but 12480 will be 12730/1000 = 12 + fakeMSR = (multi + 250) / 1000; + fakeMSR <<= 40; // push multiplier into bits 44 to 40 + + // If fractional part was within (0.25, 0.75), set N/2 + if ((multi % 1000 > 250) && (multi % 1000 < 750)) + fakeMSR |= (1ull << 46); + + return fakeMSR; +} /* * Initialize the various conversion factors needed by code referencing @@ -135,15 +218,29 @@ tsc_init(void) { uint64_t busFCvtInt = 0; - boolean_t N_by_2_bus_ratio = FALSE; - + uint64_t N_by_2_bus_ratio = FALSE; /* * Get the FSB frequency and conversion factors from EFI. */ - busFreq = EFI_FSB_frequency(); - + if (!PE_parse_boot_argn("fsb", &busFreq, sizeof(busFreq))) { + // blackknight; added for corrected FSB detection for Phenoms and Shanghais + switch (cpuid_family()) { + case CPU_FAMILY_AMD_PHENOM: + case CPU_FAMILY_AMD_SHANGHAI: { + busFreq = 2 * EFI_FSB_frequency(); + if (busFreq == 0) + busFreq = 200 * Mega; + } + break; + default: + busFreq = EFI_FSB_frequency(); + } + } switch (cpuid_cpufamily()) { + case CPU_FAMILY_I9: + case CPU_FAMILY_I5: case CPUFAMILY_INTEL_WESTMERE: + case CPU_FAMILY_SANDY: case CPUFAMILY_INTEL_NEHALEM: { uint64_t cpu_mhz; uint64_t msr_flex_ratio; @@ -171,15 +268,187 @@ cpu_mhz = tscGranularity * BASE_NHM_CLOCK_SOURCE; - break; } + break; default: { - uint64_t prfsts; + /* + * mercurysquad: The bus ratio is crucial to setting the proper rtc increment. + * There are several methods so we first check any bootlfags. If none is specified, we choose + * based on the CPU type. + */ + uint64_t cpuFreq = 0, prfsts = 0, boot_arg = 0; + busratio_path_t busRatioPath = BUSRATIO_AUTODETECT; + + if (PE_parse_boot_argn("busratiopath", &boot_arg, sizeof(boot_arg))) + busRatioPath = (busratio_path_t) boot_arg; + else + busRatioPath = BUSRATIO_AUTODETECT; + + if (PE_parse_boot_argn("busratio", &tscGranularity, sizeof(tscGranularity))) + busRatioPath = BUSRATIO_BOOTFLAG; + + if (busRatioPath == BUSRATIO_AUTODETECT) { + /* This happens if no bootflag above was specified. + * We'll choose based on CPU type */ + switch (cpuid_info()->cpuid_family) { + case CPU_FAMILY_PENTIUM_4: + /* This could be AMD Athlon or Intel P4 as both have family Fh */ + if (IsAmdCPU()) + busRatioPath = BUSRATIO_ATHLON; + else if (cpuid_info()->cpuid_model < 2 ) + /* These models don't implement proper MSR 198h or 2Ch */ + busRatioPath = BUSRATIO_TIMER; + else if (cpuid_info()->cpuid_model == 2) + /* This model has an MSR we can use */ + busRatioPath = BUSRATIO_PENTIUM4_MSR; + else /* 3 or higher */ + /* Other models should implement MSR 198h */ + busRatioPath = BUSRATIO_INTEL_MSR; + break; + case CPU_FAMILY_PENTIUM_M: + if (cpuid_info()->cpuid_model >= 0xD) + /* Pentium M or Core and above can use Apple method*/ + busRatioPath = BUSRATIO_INTEL_MSR; + else + /* Other Pentium class CPU, use safest option */ + busRatioPath = BUSRATIO_TIMER; + break; + case CPU_FAMILY_AMD_PHENOM: + case CPU_FAMILY_AMD_SHANGHAI: + /* These have almost the same method, with a minor difference */ + busRatioPath = BUSRATIO_PHENOM_SHANGHAI; + break; + default: + /* Fall back to safest method */ + busRatioPath = BUSRATIO_TIMER; + }; + } + + /* + * Now that we have elected a bus ratio path, we can proceed to calculate it. + */ + printf("rtclock_init: Taking bus ratio path %d (%s)\n", + busRatioPath, busRatioPathNames[busRatioPath]); + switch (busRatioPath) { + case BUSRATIO_BOOTFLAG: + /* tscGranularity was already set. However, check for N/2. N/2 is specified by + * giving a busratio of 10 times what it is (so last digit is 5). We set a cutoff + * of 30 before deciding it's n/2. TODO: find a better way */ + if (tscGranularity == 0) tscGranularity = 1; // avoid div by zero + N_by_2_bus_ratio = (tscGranularity > 30) && ((tscGranularity % 10) != 0); + if (N_by_2_bus_ratio) tscGranularity /= 10; /* Scale it back to normal */ + break; +#ifndef __i386__ //AnV: in case of x86_64 boot default for busratio timer to EFI value + case BUSRATIO_TIMER: +#endif + case BUSRATIO_EFI: + /* This uses the CPU frequency exported into EFI by the bootloader */ + cpuFreq = EFI_CPU_Frequency(); + prfsts = getFakeMSR(cpuFreq, busFreq); + tscGranularity = (uint32_t)bitfield(prfsts, 44, 40); + N_by_2_bus_ratio = prfsts & bit(46); + break; + case BUSRATIO_INTEL_MSR: + /* This will read the performance status MSR on intel systems (Apple method) */ + prfsts = rdmsr64(IA32_PERF_STS); + tscGranularity = (uint32_t)bitfield(prfsts, 44, 40); + N_by_2_bus_ratio= prfsts & bit(46); + break; + case BUSRATIO_ATHLON: + /* Athlons specify the bus ratio directly in an MSR using a simple formula */ + prfsts = rdmsr64(AMD_PERF_STS); + tscGranularity = 4 + bitfield(prfsts, 5, 1); + N_by_2_bus_ratio= prfsts & bit(0); /* FIXME: This is experimental! */ + break; + case BUSRATIO_PENTIUM4_MSR: + prfsts = rdmsr64(0x2C); // TODO: Add to header + tscGranularity = bitfield(prfsts, 31, 24); + break; + case BUSRATIO_PHENOM_SHANGHAI: + /* Phenoms and Shanghai processors have a different MSR to read the frequency + * multiplier and divisor, from which the cpu frequency can be calculated. + * This can then be used to construct the fake MSR. */ + prfsts = rdmsr64(AMD_COFVID_STS); + printf("rtclock_init: Phenom MSR 0x%x returned: 0x%llx\n", AMD_COFVID_STS, prfsts); + uint64_t cpuFid = bitfield(prfsts, 5, 0); + uint64_t cpuDid = bitfield(prfsts, 8, 6); + /* The base for Fid could be either 8 or 16 depending on the cpu family */ + if (cpuid_info()->cpuid_family == CPU_FAMILY_AMD_PHENOM) + cpuFreq = (100 * Mega * (cpuFid + 0x10)) >> cpuDid; + else /* shanghai */ + cpuFreq = (100 * Mega * (cpuFid + 0x08)) >> cpuDid; + prfsts = getFakeMSR(cpuFreq, busFreq); + tscGranularity = (uint32_t)bitfield(prfsts, 44, 40); + N_by_2_bus_ratio = prfsts & bit(46); + break; +#ifdef __i386__ //qoopz: no get_PIT2 for x86_64 + case BUSRATIO_TIMER: + /* Fun fun fun. :-| */ + cpuFreq = timeRDTSC() * 20; + prfsts = getFakeMSR(cpuFreq, busFreq); + tscGranularity = (uint32_t)bitfield(prfsts, 44, 40); + N_by_2_bus_ratio = prfsts & bit(46); + break; +#endif + case BUSRATIO_AUTODETECT: + default: + kTscPanicOn = 1; /* see sanity check below */ + }; + +#ifdef __i386__ + /* Verify */ + if (!PE_parse_boot_argn("-notscverify", &boot_arg, sizeof(boot_arg))) { + uint64_t realCpuFreq = timeRDTSC() * 20; + cpuFreq = tscGranularity * busFreq; + if (N_by_2_bus_ratio) cpuFreq += (busFreq / 2); + uint64_t difference = 0; + if (realCpuFreq > cpuFreq) + difference = realCpuFreq - cpuFreq; + else + difference = cpuFreq - realCpuFreq; + + if (difference >= 4*Mega) { + // Shouldn't have more than 4MHz difference. This is about 2-3% of most FSBs. + // Fall back to using measured speed and correct the busFreq + // Note that the tscGran was read from CPU so should be correct. + // Only on Phenom the tscGran is calculated by dividing by busFreq. + printf("TSC: Reported FSB: %4d.%04dMHz, ", (uint32_t)(busFreq / Mega), (uint32_t)(busFreq % Mega)); + if (N_by_2_bus_ratio) + busFreq = (realCpuFreq * 2) / (1 + 2*tscGranularity); + else + busFreq = realCpuFreq / tscGranularity; + printf("corrected FSB: %4d.%04dMHz\n", (uint32_t)(busFreq / Mega), (uint32_t)(busFreq % Mega)); + // Reset the busCvt factors + busFCvtt2n = ((1 * Giga) << 32) / busFreq; + busFCvtn2t = 0xFFFFFFFFFFFFFFFFULL / busFCvtt2n; + busFCvtInt = tmrCvt(1 * Peta, 0xFFFFFFFFFFFFFFFFULL / busFreq); + printf("TSC: Verification of clock speed failed. " + "Fallback correction was performed. Please upgrade bootloader.\n"); + } else { + printf("TSC: Verification of clock speed PASSED.\n"); + } + } +#else + printf("TSC: Verification of clock speed not available in x86_64.\n"); +#endif + + /* Do a sanity check of the granularity */ + if ((tscGranularity == 0) || + (tscGranularity > 30) || + (busFreq < 50*Mega) || + (busFreq > 1*Giga) || + /* The following is useful to force a panic to print diagnostic info */ + PE_parse_boot_argn("-tscpanic", &boot_arg, sizeof(boot_arg))) + { + printf("\n\n"); + printf(" >>> The real-time clock was not properly initialized on your system!\n"); + printf(" Contact Voodoo Software for further information.\n"); + kTscPanicOn = 1; /* Later when the console is initialized, this will show up, and we'll halt */ + if (tscGranularity == 0) tscGranularity = 1; /* to avoid divide-by-zero in the following few lines */ + } - prfsts = rdmsr64(IA32_PERF_STS); - tscGranularity = (uint32_t)bitfield(prfsts, 44, 40); - N_by_2_bus_ratio = (prfsts & bit(46)) != 0; } + break; } if (busFreq != 0) { @@ -187,7 +456,10 @@ busFCvtn2t = 0xFFFFFFFFFFFFFFFFULL / busFCvtt2n; busFCvtInt = tmrCvt(1 * Peta, 0xFFFFFFFFFFFFFFFFULL / busFreq); } else { - panic("tsc_init: EFI not supported!\n"); + /* Instead of panicking, set a default FSB frequency */ + busFreq = 133*Mega; + kprintf("rtclock_init: Setting fsb to %u MHz\n", (uint32_t) (busFreq/Mega)); + } kprintf(" BUS: Frequency = %6d.%04dMHz, " @@ -198,7 +470,7 @@ (uint32_t)(busFCvtt2n >> 32), (uint32_t)busFCvtt2n, (uint32_t)(busFCvtn2t >> 32), (uint32_t)busFCvtn2t, (uint32_t)(busFCvtInt >> 32), (uint32_t)busFCvtInt); - + /* * Get the TSC increment. The TSC is incremented by this * on every bus tick. Calculate the TSC conversion factors @@ -215,14 +487,12 @@ tscFreq = ((1 * Giga) << 32) / tscFCvtt2n; tscFCvtn2t = 0xFFFFFFFFFFFFFFFFULL / tscFCvtt2n; - kprintf(" TSC: Frequency = %6d.%04dMHz, " - "cvtt2n = %08X.%08X, cvtn2t = %08X.%08X, gran = %lld%s\n", - (uint32_t)(tscFreq / Mega), - (uint32_t)(tscFreq % Mega), - (uint32_t)(tscFCvtt2n >> 32), (uint32_t)tscFCvtt2n, - (uint32_t)(tscFCvtn2t >> 32), (uint32_t)tscFCvtn2t, - tscGranularity, N_by_2_bus_ratio ? " (N/2)" : ""); - + printf("TSC: Frequency = %6d.%04dMHz, FSB frequency = %4d.%04dMHz, bus ratio = %lld%s\n", + (uint32_t)(tscFreq / Mega), + (uint32_t)(tscFreq % Mega), + (uint32_t)(busFreq / Mega), (uint32_t) (busFreq % Mega), + tscGranularity, N_by_2_bus_ratio ? ".5" : ""); + /* * Calculate conversion from BUS to TSC */ diff -Naur xnu-1504.9.26.orig/osfmk/i386/tsc.h xnu-1504.9.26/osfmk/i386/tsc.h --- xnu-1504.9.26.orig/osfmk/i386/tsc.h 2011-01-06 11:45:34.000000000 -0500 +++ xnu-1504.9.26/osfmk/i386/tsc.h 2011-01-09 16:00:20.000000000 -0500 @@ -42,6 +42,11 @@ #define BASE_NHM_CLOCK_SOURCE 133333333ULL #define IA32_PERF_STS 0x198 +/* mercurysquad: MSRs for AMD support (getting bus ratio) */ +#define AMD_PERF_STS 0xC0010042 /* AMD's version of the MSR */ +#define AMD_PSTATE0_STS 0xC0010064 /* K10/phenom class AMD cpus */ +#define AMD_COFVID_STS 0xC0010071 /* This might be a better MSR for K10? */ + extern uint64_t busFCvtt2n; extern uint64_t busFCvtn2t; @@ -51,6 +56,7 @@ extern uint64_t tscGranularity; extern uint64_t bus2tsc; extern uint64_t busFreq; +extern uint32_t kTscPanicOn; extern uint32_t flex_ratio; extern uint32_t flex_ratio_min; extern uint32_t flex_ratio_max; diff -Naur xnu-1504.9.26.orig/osfmk/kern/Makefile xnu-1504.9.26/osfmk/kern/Makefile --- xnu-1504.9.26.orig/osfmk/kern/Makefile 2011-01-06 11:45:39.000000000 -0500 +++ xnu-1504.9.26/osfmk/kern/Makefile 2011-01-09 16:00:20.000000000 -0500 @@ -37,6 +37,7 @@ startup.h \ task.h \ thread.h \ + voodoo_assert.h \ thread_call.h \ wait_queue.h \ zalloc.h diff -Naur xnu-1504.9.26.orig/osfmk/kern/debug.c xnu-1504.9.26/osfmk/kern/debug.c --- xnu-1504.9.26.orig/osfmk/kern/debug.c 2011-01-06 11:45:34.000000000 -0500 +++ xnu-1504.9.26/osfmk/kern/debug.c 2011-01-09 16:00:20.000000000 -0500 @@ -98,7 +98,7 @@ unsigned int disable_debug_output = TRUE; unsigned int systemLogDiags = FALSE; unsigned int panicDebugging = FALSE; -unsigned int logPanicDataToScreen = FALSE; +unsigned int logPanicDataToScreen = TRUE; int mach_assert = 1; @@ -277,6 +277,10 @@ panicstr = str; paniccpu = cpu_number(); panicwait = 1; + kdb_printf("Please contact someone with a photo of the\n" + "information printed below, along with a description of your\n" + "system configuration and what you were doing at the time that\n" + "the kernel panic occurred. We apologize for the inconvenience.\n\n"); PANIC_UNLOCK(); kdb_printf("panic(cpu %d caller 0x%lx): ", (unsigned) paniccpu, panic_caller); diff -Naur xnu-1504.9.26.orig/osfmk/kern/voodoo_assert.h xnu-1504.9.26/osfmk/kern/voodoo_assert.h --- xnu-1504.9.26.orig/osfmk/kern/voodoo_assert.h 1969-12-31 19:00:00.000000000 -0500 +++ xnu-1504.9.26/osfmk/kern/voodoo_assert.h 2011-01-09 16:00:20.000000000 -0500 @@ -0,0 +1,11 @@ +#ifndef _VOODOO_ASSERT_H +#define _VOODOO_ASSERT_H + +#include + +#define ASSERT(expr) do { if (!(expr)) panic("%s: failed assertion '%s'", \ + __FUNCTION__, #expr); } while (0) + +#define BUG(msg) panic("%s: %s\n", __FUNCTION__, #msg) + +#endif \ No newline at end of file diff -Naur xnu-1504.9.26.orig/osfmk/vm/vm_object.c xnu-1504.9.26/osfmk/vm/vm_object.c --- xnu-1504.9.26.orig/osfmk/vm/vm_object.c 2011-01-06 11:45:36.000000000 -0500 +++ xnu-1504.9.26/osfmk/vm/vm_object.c 2011-01-09 16:00:20.000000000 -0500 @@ -95,6 +95,8 @@ #include #include +#include + #if CONFIG_EMBEDDED #include #endif @@ -7436,3 +7438,107 @@ } return (lck_rw_try_lock_shared(&object->Lock)); } + +/* kaitek: the following functions are needed in this part of the kernel since we can't + * easily work with vm_objects in the bsd subsystem where the mach-o loader resides. */ + +void dump_vm_object(vm_object_t object, const char *name, uint32_t flags) +{ + if (!object) { + printf("warning: tried to dump null vm_object\n"); + return; + } else if (!name) + name = "n/a"; + + if (flags & DUMP_VM_OBJ_REFCOUNT) + printf("vm_object %p (%s): ref_count = %d\n", object, name, object->ref_count); + if (flags & DUMP_VM_OBJ_PAGER) + printf("vm_object %p (%s): pager = %p, pager_control = %p\n", object, name, + object->pager, object->pager_control); + if (flags & DUMP_VM_OBJ_COPY) + printf("vm_object %p (%s): copy = %p, shadow = %p\n", object, name, + object->copy, object->shadow); +} + +vm_object_t exec_obj_create(memory_object_t pager, mach_vm_offset_t offset, + mach_vm_size_t size, __unused boolean_t is_verbose) +{ + kern_return_t ret; + vm_object_t object, copy = NULL; + boolean_t result = FALSE; + vm_object_offset_t new_offset; + boolean_t needs_copy; + + ASSERT(ip_kotype((ipc_port_t) pager) == IKOT_MEMORY_OBJECT); + + object = vm_object_enter(pager, size, FALSE, FALSE, FALSE); + if (!object) { + printf("vm_object_enter failed\n"); + goto failed; + } + + vm_object_lock(object); + while (!object->pager_ready) { + vm_object_wait(object, VM_OBJECT_EVENT_PAGER_READY, THREAD_UNINT); + vm_object_lock(object); + } + vm_object_unlock(object); + + if (object->copy_strategy != MEMORY_OBJECT_COPY_DELAY) { + printf("copy strategy (%d) not copy delay\n", object->copy_strategy); + goto failed; + } + ret = vm_object_copy_strategically(object, offset, size, ©, &new_offset, + &needs_copy); + if (ret != KERN_SUCCESS) { + printf("vm_object_copy_strategically returned %d\n", ret); + goto failed; + } + ASSERT(new_offset == offset); + ASSERT(needs_copy); + + ret = vm_object_lock_request(object, offset, size, MEMORY_OBJECT_RETURN_NONE, + MEMORY_OBJECT_COPY_SYNC, VM_PROT_NO_CHANGE); + if (ret != KERN_SUCCESS) { + printf("vm_object_lock_request failed\n"); + goto failed; + } + + /* it seems counterintuitive to drop the reference taken by v_o_copy_strat, but + * this is what vm_map does in the copy-on-write path */ + vm_object_deallocate(object); + + result = TRUE; +failed: +#ifdef EXTRA_VERBOSE + if (is_verbose) { + /* todo: compact the output from dump_v_o a bit so that we don't have to + * make this optional for the sake of having readable verbose output */ + uint32_t flags = DUMP_VM_OBJ_REFCOUNT | DUMP_VM_OBJ_COPY; + if (copy) + dump_vm_object(copy, "eoc copy", flags); + dump_vm_object(object, "eoc parent", flags); + } +#endif + if (!result && copy) { + vm_object_deallocate(copy); + copy = NULL; + } + + return copy; +} + +void exec_obj_reference(vm_object_t object) +{ + vm_object_reference_shared(object); +} + +void exec_obj_deallocate(vm_object_t object) +{ + vm_object_deallocate(object); +} + +int exec_obj_get_refcount(vm_object_t object) +{ + return object->ref_count; +} diff -Naur xnu-1504.9.26.orig/osfmk/vm/vm_protos.h xnu-1504.9.26/osfmk/vm/vm_protos.h --- xnu-1504.9.26.orig/osfmk/vm/vm_protos.h 2011-01-06 11:45:36.000000000 -0500 +++ xnu-1504.9.26/osfmk/vm/vm_protos.h 2011-01-09 16:00:20.000000000 -0500 @@ -418,6 +418,22 @@ extern void vm_paging_map_init(void); +/* kaitek: see vm_object.c for a description of the following functions. */ + +vm_object_t exec_obj_create(memory_object_t pager, mach_vm_offset_t offset, + mach_vm_size_t size, boolean_t is_verbose); +void exec_obj_reference(vm_object_t object); +void exec_obj_deallocate(vm_object_t object); +int exec_obj_get_refcount(vm_object_t object); + +#define DUMP_VM_OBJ_REFCOUNT (1 << 0) +#define DUMP_VM_OBJ_PAGER (1 << 1) +#define DUMP_VM_OBJ_COPY (1 << 2) + +#define DUMP_VM_OBJ_FLAGS ((1 << 3) - 1) + +void dump_vm_object(vm_object_t object, const char *name, uint32_t flags); + extern int macx_backing_store_compaction(int flags); extern unsigned int mach_vm_ctl_page_free_wanted(void); diff -Naur xnu-1504.9.26.orig/pexpert/i386/pe_kprintf.c xnu-1504.9.26/pexpert/i386/pe_kprintf.c --- xnu-1504.9.26.orig/pexpert/i386/pe_kprintf.c 2011-01-06 11:45:37.000000000 -0500 +++ xnu-1504.9.26/pexpert/i386/pe_kprintf.c 2011-01-09 16:00:20.000000000 -0500 @@ -28,6 +28,7 @@ /* * file: pe_kprintf.c * i386 platform expert debugging output initialization. + * (kaitek: modified to support output to console log) */ #include #include @@ -36,6 +37,8 @@ #include #include +extern void conslog_putc(char); + /* Globals */ void (*PE_kputc)(char c); @@ -46,6 +49,7 @@ unsigned int disable_serial_output = FALSE; #else unsigned int disable_serial_output = TRUE; +unsigned int enable_conslog_kprintf = FALSE; #endif decl_simple_lock_data(static, kprintf_lock) @@ -62,7 +66,9 @@ simple_lock_init(&kprintf_lock, 0); - if (PE_parse_boot_argn("debug", &boot_arg, sizeof (boot_arg))) + if (PE_parse_boot_argn("kprintf", &boot_arg, sizeof(boot_arg)) && boot_arg) + enable_conslog_kprintf = TRUE; + else if (PE_parse_boot_argn("debug", &boot_arg, sizeof(boot_arg))) if (boot_arg & DB_KPRT) new_disable_serial_output = FALSE; @@ -71,7 +77,7 @@ if (!new_disable_serial_output && (!disable_serial_output || serial_init())) PE_kputc = serial_putc; else - PE_kputc = cnputc; + PE_kputc = conslog_putc; disable_serial_output = new_disable_serial_output; } @@ -102,7 +108,7 @@ va_list listp; boolean_t state; - if (!disable_serial_output) { + if (enable_conslog_kprintf || !disable_serial_output) { /* If PE_kputc has not yet been initialized, don't * take any locks, just dump to serial */