assembly – 0x2B|~0x2B

Linux System Call

Posted on 2013-11-08 by gonwan — No Comments ↓

The HelloWorld application is much simpler than the Windows one. Just put parameters into registers from %eax to %edx, and trigger a 0x80 interrupt.

# gcc -nostdlib syscall_linux.s -o syscall_linux
.global _start

.text

_start:
    # write(1, message, 13)
    mov     $4, %eax            # system call 4 is write
    mov     $1, %ebx            # file handle 1 is stdout
    mov     $message, %ecx      # address of string to output
    mov     $13, %edx           # number of bytes to write
    int     $0x80               # invoke system call  
    # exit(0)
    mov     $1, %eax            # system call 1 is exit
    xor     %ebx, %ebx          # return 0
    int     $0x80               # invoke system call
message:
    .ascii  "Hello World!\n"

# gcc -nostdlib syscall_linux.s -o syscall_linux

.global _start

.text

_start:

# write(1, message, 13)

mov $4, %eax # system call 4 is write

mov $1, %ebx # file handle 1 is stdout

mov $message, %ecx # address of string to output

mov $13, %edx # number of bytes to write

int $0x80 # invoke system call

# exit(0)

mov $1, %eax # system call 1 is exit

xor %ebx, %ebx # return 0

int $0x80 # invoke system call

message:

.ascii "Hello World!\n"

Windows System Call Sequence and Simulation

Posted on 2013-11-07 by gonwan — No Comments ↓

There are hundreds of documents telling how Windows implements its system call, using int 2e or sysenter. But I can find no code to run to learn how exactly it works. And I managed to write it for my own.

The C code requires only SDK to compile, for I have copied all DDK definitions inline. It opens a C:\test.txt file and write Hello World! to it. Quite simple. I’ve tried a HelloWorld console application. But its call sequence is far more complex than I have expected, after I have made some reverse engineering and read some code from ReactOS project(Wine does not help, since it does not implement a Win32 compatible call sequence in the console case). The code is the basis of our further investigation. It invokes NtCreateFile(), NtWriteFile() and NtClose() in ntdll.dll with dynamic loading:

#include <windows.h>
#include <stdio.h>

#define FILE_OVERWRITE_IF               0x00000005
#define FILE_SYNCHRONOUS_IO_NONALERT    0x00000020
#define OBJ_KERNEL_HANDLE               0x00000200L
#define NT_SUCCESS(Status)      ((NTSTATUS)(Status) >= 0)

typedef LONG NTSTATUS;

typedef struct _UNICODE_STRING {
    USHORT Length;
    USHORT MaximumLength;
    PWSTR  Buffer;
} UNICODE_STRING, *PUNICODE_STRING;

typedef struct _OBJECT_ATTRIBUTES {
    ULONG Length;
    HANDLE RootDirectory;
    PUNICODE_STRING ObjectName;
    ULONG Attributes;
    PVOID SecurityDescriptor;        // Points to type SECURITY_DESCRIPTOR
    PVOID SecurityQualityOfService;  // Points to type SECURITY_QUALITY_OF_SERVICE
} OBJECT_ATTRIBUTES, *POBJECT_ATTRIBUTES;

typedef struct _IO_STATUS_BLOCK {
    union {
        NTSTATUS Status;
        PVOID Pointer;
    };
    ULONG_PTR Information;
} IO_STATUS_BLOCK, *PIO_STATUS_BLOCK;

typedef VOID (NTAPI *PIO_APC_ROUTINE) (
    IN PVOID ApcContext,
    IN PIO_STATUS_BLOCK IoStatusBlock,
    IN ULONG Reserved
);

typedef NTSTATUS (WINAPI *FnNtCreateFile)(
    PHANDLE FileHandle,
    ACCESS_MASK DesiredAccess,
    POBJECT_ATTRIBUTES ObjectAttributes,
    PIO_STATUS_BLOCK IoStatusBlock,
    PLARGE_INTEGER AllocationSize,
    ULONG FileAttributes,
    ULONG ShareAccess,
    ULONG CreateDisposition,
    ULONG CreateOptions,
    PVOID EaBuffer,
    ULONG EaLength
);

typedef NTSTATUS (WINAPI *FnNtWriteFile)(
    HANDLE FileHandle,
    HANDLE Event,
    PIO_APC_ROUTINE ApcRoutine,
    PVOID ApcContext,
    PIO_STATUS_BLOCK IoStatusBlock,
    PVOID Buffer,
    ULONG Length,
    PLARGE_INTEGER ByteOffset,
    PULONG Key
);

typedef NTSTATUS (WINAPI *FnNtClose)(
    HANDLE Handle
);

int main()
{
    HMODULE hModule;
    FnNtCreateFile pfnNtCreateFile;
    FnNtWriteFile pfnNtWriteFile;
    FnNtClose pfnNtClose;
    hModule = LoadLibraryA("ntdll.dll");  /* always 0x7c900000 on XP */
    if (hModule == NULL) {
        return -1;
    }
    pfnNtCreateFile = (FnNtCreateFile)GetProcAddress(hModule, "NtCreateFile");  /* 0x7c90d090 */
    pfnNtWriteFile = (FnNtWriteFile)GetProcAddress(hModule, "NtWriteFile");  /* 0x7c90df60 */
    pfnNtClose = (FnNtClose)GetProcAddress(hModule, "NtClose");  /* 0x7c90cfd0 */
    if (pfnNtCreateFile == NULL || pfnNtWriteFile == NULL || pfnNtClose == NULL) {
        FreeLibrary(hModule);
        return -1;
    } else {
        NTSTATUS ntStatus;
        UNICODE_STRING us;
        OBJECT_ATTRIBUTES oa;
        IO_STATUS_BLOCK ioStatusBlock;
        HANDLE hFile;
        char szHello[] = "Hello World!";
        us.Buffer = L"\\??\\C:\\test.txt";
        us.Length = (USHORT)wcslen(us.Buffer) * sizeof(WCHAR);
        us.MaximumLength = us.Length + sizeof(WCHAR);
        oa.Length = sizeof(oa);
        oa.RootDirectory = NULL;
        oa.ObjectName = &us;
        oa.Attributes = OBJ_KERNEL_HANDLE;
        oa.SecurityDescriptor = NULL;
        oa.SecurityQualityOfService = NULL;
        ntStatus = pfnNtCreateFile(&hFile,
            GENERIC_ALL | SYNCHRONIZE,
            &oa,
            &ioStatusBlock,
            NULL,
            FILE_ATTRIBUTE_NORMAL,
            0,
            FILE_OVERWRITE_IF,
            FILE_SYNCHRONOUS_IO_NONALERT,
            NULL,
            0);
        if (!NT_SUCCESS(ntStatus)) {
            fprintf(stderr, "Failed to create file, error = 0x%x\n", ntStatus);
            FreeLibrary(hModule);
            return -1;
        }
        ntStatus = pfnNtWriteFile(hFile,
            NULL,
            NULL,
            NULL,
            &ioStatusBlock,
            szHello,
            (ULONG)strlen(szHello),
            NULL,
            NULL);
        if (!NT_SUCCESS(ntStatus)) {
            fprintf(stderr, "Failed to write file, error = 0x%x\n", ntStatus);
            FreeLibrary(hModule);
            return -1;
        }
        pfnNtClose(hFile);
    }
    FreeLibrary(hModule);
    return 0;
}

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

#include <windows.h>

#include <stdio.h>

#define FILE_OVERWRITE_IF 0x00000005

#define FILE_SYNCHRONOUS_IO_NONALERT 0x00000020

#define OBJ_KERNEL_HANDLE 0x00000200L

#define NT_SUCCESS(Status) ((NTSTATUS)(Status) >= 0)

typedef LONG NTSTATUS;

typedef struct _UNICODE_STRING {

USHORT Length;

USHORT MaximumLength;

PWSTR Buffer;

} UNICODE_STRING, *PUNICODE_STRING;

typedef struct _OBJECT_ATTRIBUTES {

ULONG Length;

HANDLE RootDirectory;

PUNICODE_STRING ObjectName;

ULONG Attributes;

PVOID SecurityDescriptor; // Points to type SECURITY_DESCRIPTOR

PVOID SecurityQualityOfService; // Points to type SECURITY_QUALITY_OF_SERVICE

} OBJECT_ATTRIBUTES, *POBJECT_ATTRIBUTES;

typedef struct _IO_STATUS_BLOCK {

union {

NTSTATUS Status;

PVOID Pointer;

};

ULONG_PTR Information;

} IO_STATUS_BLOCK, *PIO_STATUS_BLOCK;

typedef VOID (NTAPI *PIO_APC_ROUTINE) (

IN PVOID ApcContext,

IN PIO_STATUS_BLOCK IoStatusBlock,

IN ULONG Reserved

);

typedef NTSTATUS (WINAPI *FnNtCreateFile)(

PHANDLE FileHandle,

ACCESS_MASK DesiredAccess,

POBJECT_ATTRIBUTES ObjectAttributes,

PIO_STATUS_BLOCK IoStatusBlock,

PLARGE_INTEGER AllocationSize,

ULONG FileAttributes,

ULONG ShareAccess,

ULONG CreateDisposition,

ULONG CreateOptions,

PVOID EaBuffer,

ULONG EaLength

);

typedef NTSTATUS (WINAPI *FnNtWriteFile)(

HANDLE FileHandle,

HANDLE Event,

PIO_APC_ROUTINE ApcRoutine,

PVOID ApcContext,

PIO_STATUS_BLOCK IoStatusBlock,

PVOID Buffer,

ULONG Length,

PLARGE_INTEGER ByteOffset,

PULONG Key

);

typedef NTSTATUS (WINAPI *FnNtClose)(

HANDLE Handle

);

int main()

{

HMODULE hModule;

FnNtCreateFile pfnNtCreateFile;

FnNtWriteFile pfnNtWriteFile;

FnNtClose pfnNtClose;

hModule = LoadLibraryA("ntdll.dll"); /* always 0x7c900000 on XP */

if (hModule == NULL) {

return -1;

}

pfnNtCreateFile = (FnNtCreateFile)GetProcAddress(hModule, "NtCreateFile"); /* 0x7c90d090 */

pfnNtWriteFile = (FnNtWriteFile)GetProcAddress(hModule, "NtWriteFile"); /* 0x7c90df60 */

pfnNtClose = (FnNtClose)GetProcAddress(hModule, "NtClose"); /* 0x7c90cfd0 */

if (pfnNtCreateFile == NULL || pfnNtWriteFile == NULL || pfnNtClose == NULL) {

FreeLibrary(hModule);

return -1;

} else {

NTSTATUS ntStatus;

UNICODE_STRING us;

OBJECT_ATTRIBUTES oa;

IO_STATUS_BLOCK ioStatusBlock;

HANDLE hFile;

char szHello[] = "Hello World!";

us.Buffer = L"\\??\\C:\\test.txt";

us.Length = (USHORT)wcslen(us.Buffer) * sizeof(WCHAR);

us.MaximumLength = us.Length + sizeof(WCHAR);

oa.Length = sizeof(oa);

oa.RootDirectory = NULL;

oa.ObjectName = &us;

oa.Attributes = OBJ_KERNEL_HANDLE;

oa.SecurityDescriptor = NULL;

oa.SecurityQualityOfService = NULL;

ntStatus = pfnNtCreateFile(&hFile,

GENERIC_ALL | SYNCHRONIZE,

&oa,

&ioStatusBlock,

NULL,

FILE_ATTRIBUTE_NORMAL,

FILE_OVERWRITE_IF,

FILE_SYNCHRONOUS_IO_NONALERT,

NULL,

0);

if (!NT_SUCCESS(ntStatus)) {

fprintf(stderr, "Failed to create file, error = 0x%x\n", ntStatus);

FreeLibrary(hModule);

return -1;

}

ntStatus = pfnNtWriteFile(hFile,

NULL,

&ioStatusBlock,

szHello,

(ULONG)strlen(szHello),

NULL,

NULL);

if (!NT_SUCCESS(ntStatus)) {

fprintf(stderr, "Failed to write file, error = 0x%x\n", ntStatus);

FreeLibrary(hModule);

return -1;

}

pfnNtClose(hFile);

}

FreeLibrary(hModule);

return 0;

}

I found the handle value and all three function pointers are fixed, at least on my Windows XP(SP3). It may be caused by the preferred base address of ntdll.dll. The code should work on all Windows platforms, since it has no hardcoded values.

Now, translate the C code into assembly. Error handling is ommitted:

.386
.model flat,stdcall
.data

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;            SDK prototypes            ;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
NULL            EQU 0

UNICODE_STRING STRUCT
    Len             WORD ?
    MaximumLength   WORD ?
    Buffer          DWORD ?
UNICODE_STRING ENDS

OBJECT_ATTRIBUTES STRUCT
    Len                         DWORD ?
    RootDirectory               DWORD ?
    ObjectName                  DWORD ?
    Attributes                  DWORD ?
    SecurityDescriptor          DWORD ?
    SecurityQualityOfService    DWORD ?
OBJECT_ATTRIBUTES ENDS

IO_STATUS_BLOCK STRUCT
    Status  DWORD ?
    Pointer DWORD ?
IO_STATUS_BLOCK ENDS

ExitProcess PROTO :DWORD

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;         Program declarations         ;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; IMPORTANT: The paddding is required!!
STR_HELLO           DB      "Hello World!",0,0,0,0
STR_FILE            WORD    "\","?","?","\","C",":","\","t","e","s","t",".","t","x","t",0

.code

NtCreateFile PROC
    ; 25h(XP) or 42h(Win7)
    mov eax, 25h
    mov edx, 7ffe0300h
    call DWORD PTR [edx]
    retn 2ch
NtCreateFile ENDP

NtWriteFile PROC
    ; 112h(XP) or 18ch(Win7)
    mov eax, 112h
    mov edx, 7ffe0300h
    call DWORD PTR [edx]
    retn 24h
NtWriteFile ENDP

NtClose PROC
    ; 19h(XP) or 32h(Win7)
    mov eax, 19h
    mov edx, 7ffe0300h
    call DWORD PTR [edx]
    retn 4h
NtClose ENDP

main PROC
    ;LOCAL ntStatus:DWORD
    LOCAL us:UNICODE_STRING
    LOCAL oa:OBJECT_ATTRIBUTES
    LOCAL ioStatusBlock:IO_STATUS_BLOCK
    LOCAL hFile:DWORD
    ; 1. initialization
    mov us.Buffer, OFFSET STR_FILE
    mov us.Len, 30
    mov us.MaximumLength, 32
    mov oa.Len, TYPE OBJECT_ATTRIBUTES
    mov oa.RootDirectory, NULL
    lea eax, [us]
    mov oa.ObjectName, eax
    ; OBJ_KERNEL_HANDLE
    mov oa.Attributes, 200h
    mov oa.SecurityDescriptor, NULL
    mov oa.SecurityQualityOfService, NULL
    ; 2. parameters of NtCreateFile
    push 0
    push NULL
    ; FILE_SYNCHRONOUS_IO_NONALERT
    push 20h
    ; FILE_OVERWRITE_IF
    push 5h
    push 0
    ; FILE_ATTRIBUTE_NORMAL
    push 80h      
    push NULL
    lea eax, [ioStatusBlock]
    push eax
    lea eax, [oa]
    push eax
    ; GENERIC_ALL | SYNCHRONIZE
    push 10100000h
    lea eax, [hFile]
    push eax
    ; 3. call NtCreateFile
    call NtCreateFile
    ; 4. parameters of NtWriteFile
    push NULL
    push NULL
    push 12
    push OFFSET STR_HELLO
    lea eax, [ioStatusBlock]
    push eax
    push NULL
    push NULL
    push NULL
    push hFile
    ; 5. call NtWriteFile
    call NtWriteFile
    ; 6. parameters of NtClose
    push hFile
    ; 7. call NtClose
    call NtClose
    ; 8. Exit
    ;INVOKE ExitProcess, 0
    ret
main ENDP

END main

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

.386

.model flat,stdcall

.data

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

; SDK prototypes ;

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

NULL EQU 0

UNICODE_STRING STRUCT

Len WORD ?

MaximumLength WORD ?

Buffer DWORD ?

UNICODE_STRING ENDS

OBJECT_ATTRIBUTES STRUCT

Len DWORD ?

RootDirectory DWORD ?

ObjectName DWORD ?

Attributes DWORD ?

SecurityDescriptor DWORD ?

SecurityQualityOfService DWORD ?

OBJECT_ATTRIBUTES ENDS

IO_STATUS_BLOCK STRUCT

Status DWORD ?

Pointer DWORD ?

IO_STATUS_BLOCK ENDS

ExitProcess PROTO :DWORD

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

; Program declarations ;

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

; IMPORTANT: The paddding is required!!

STR_HELLO DB "Hello World!",0,0,0,0

STR_FILE WORD "\","?","?","\","C",":","\","t","e","s","t",".","t","x","t",0

.code

NtCreateFile PROC

; 25h(XP) or 42h(Win7)

mov eax, 25h

mov edx, 7ffe0300h

call DWORD PTR [edx]

retn 2ch

NtCreateFile ENDP

NtWriteFile PROC

; 112h(XP) or 18ch(Win7)

mov eax, 112h

mov edx, 7ffe0300h

call DWORD PTR [edx]

retn 24h

NtWriteFile ENDP

NtClose PROC

; 19h(XP) or 32h(Win7)

mov eax, 19h

mov edx, 7ffe0300h

call DWORD PTR [edx]

retn 4h

NtClose ENDP

main PROC

;LOCAL ntStatus:DWORD

LOCAL us:UNICODE_STRING

LOCAL oa:OBJECT_ATTRIBUTES

LOCAL ioStatusBlock:IO_STATUS_BLOCK

LOCAL hFile:DWORD

; 1. initialization

mov us.Buffer, OFFSET STR_FILE

mov us.Len, 30

mov us.MaximumLength, 32

mov oa.Len, TYPE OBJECT_ATTRIBUTES

mov oa.RootDirectory, NULL

lea eax, [us]

mov oa.ObjectName, eax

; OBJ_KERNEL_HANDLE

mov oa.Attributes, 200h

mov oa.SecurityDescriptor, NULL

mov oa.SecurityQualityOfService, NULL

; 2. parameters of NtCreateFile

push 0

push NULL

; FILE_SYNCHRONOUS_IO_NONALERT

push 20h

; FILE_OVERWRITE_IF

push 5h

push 0

; FILE_ATTRIBUTE_NORMAL

push 80h

push NULL

lea eax, [ioStatusBlock]

push eax

lea eax, [oa]

push eax

; GENERIC_ALL | SYNCHRONIZE

push 10100000h

lea eax, [hFile]

push eax

; 3. call NtCreateFile

call NtCreateFile

; 4. parameters of NtWriteFile

push NULL

push 12

push OFFSET STR_HELLO

lea eax, [ioStatusBlock]

push eax

push NULL

push hFile

; 5. call NtWriteFile

call NtWriteFile

; 6. parameters of NtClose

push hFile

; 7. call NtClose

call NtClose

; 8. Exit

;INVOKE ExitProcess, 0

ret

main ENDP

END main

Compile the code with:

# ml /c testnt.asm
# link /subsystem:console testnt.obj

1 2	# ml /c testnt.asm # link /subsystem:console testnt.obj

The assembly code of NtCreateFile(), NtWriteFile() and NtClose() are copied directly from ntdll.dll. For NtCreate(), 25h is the system service number that will be used to index into the KiServiceTable(SSDT, System Service Dispatch Table) to locate the kernel function that handles the call.

System service numbers vary between Windows versions. This is why they are not recommend to be used directly to invoke system calls. I only demonstrate the approach here. For Windows XP, the values of the three numbers are 25h, 112h and 19h. While for Windows 7, they are 42h, 18ch and 32h. Change them yourself if you’re running Windows 7. For a complete list of system service numbers, refer here or dissemble your ntdll.dll manually :). The output executable is a tiny one, only 3KB in size, since it eliminates the usage of CRT. Moreover, it has an empty list of import functions!

At 7ffe0300h is a pointer to the following code:

mov edx, esp
sysenter

1 2	mov edx, esp sysenter

NOTE: The assembly code may work only when compiled to a 32-bit application. 64-bit mode is not tested and need modification to work.

One last point, it seems the STR_HELLO string is required to be aligned to 8 byte border. Otherwise, you will get 0x80000002 error code(STATUS_DATATYPE_MISALIGNMENT).

Compiler Intrinsic Functions

Posted on 2013-10-30 by gonwan — No Comments ↓

Copied from Wikipedia:

An intrinsic function is a function available for use in a given programming language whose implementation is handled specially by the compiler. Typically, it substitutes a sequence of automatically generated instructions for the original function call, similar to an inline function. Unlike an inline function though, the compiler has an intimate knowledge of the intrinsic function and can therefore better integrate it and optimize it for the situation. This is also called builtin function in many languages.

A code snippet is written to check the code generation when intrinsic is enabled or not:

/*
 * # gcc -S intrinsic.c -o intrinsic.s
 * # gcc -S -fno-builtin intrinsic.c -o intrinsic2.s
 * # cl /c /Oi intrinsic.c /FAs /Faintrinsic.asm
 * # cl /c intrinsic.c /FAs /Faintrinsic2.asm
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

const char *c = "Hello World!";
char c2[16];

int main(int argc, char *argv[])
{
    int a = abs(argc);
    memcpy(c2, c, 12);
    printf("%d,%s\n", a, c2);
    return 0;
}

* # gcc -S intrinsic.c -o intrinsic.s

* # gcc -S -fno-builtin intrinsic.c -o intrinsic2.s

* # cl /c /Oi intrinsic.c /FAs /Faintrinsic.asm

* # cl /c intrinsic.c /FAs /Faintrinsic2.asm

#include <stdio.h>

#include <stdlib.h>

#include <string.h>

const char *c = "Hello World!";

char c2[16];

int main(int argc, char *argv[])

{

int a = abs(argc);

memcpy(c2, c, 12);

printf("%d,%s\n", a, c2);

return 0;

}

Generated assembly:

main:
    pushl   %ebp
    movl    %esp, %ebp
    andl    $-16, %esp
    subl    $32, %esp
    movl    8(%ebp), %eax
    sarl    $31, %eax
    movl    %eax, %edx
    xorl    8(%ebp), %edx
    movl    %edx, 28(%esp)
    subl    %eax, 28(%esp)
    movl    c, %eax
    movl    %eax, %edx
    movl    $c2, %eax
    movl    (%edx), %ecx
    movl    %ecx, (%eax)
    movl    4(%edx), %ecx
    movl    %ecx, 4(%eax)
    movl    8(%edx), %edx
    movl    %edx, 8(%eax)
    movl    $.LC1, %eax
    movl    $c2, 8(%esp)
    movl    28(%esp), %edx
    movl    %edx, 4(%esp)
    movl    %eax, (%esp)
    call    printf
    movl    $0, %eax
    leave
    ret

main:

pushl %ebp

movl %esp, %ebp

andl $-16, %esp

subl $32, %esp

movl 8(%ebp), %eax

sarl $31, %eax

movl %eax, %edx

xorl 8(%ebp), %edx

movl %edx, 28(%esp)

subl %eax, 28(%esp)

movl c, %eax

movl %eax, %edx

movl $c2, %eax

movl (%edx), %ecx

movl %ecx, (%eax)

movl 4(%edx), %ecx

movl %ecx, 4(%eax)

movl 8(%edx), %edx

movl %edx, 8(%eax)

movl $.LC1, %eax

movl $c2, 8(%esp)

movl 28(%esp), %edx

movl %edx, 4(%esp)

movl %eax, (%esp)

call printf

movl $0, %eax

leave

ret

Only printf() is in code. No abs() nor memcpy(). Since they are intrinsic, as listed here in gcc’s online document.

Intrinsic can be explicitly disabled. For instance, CRT intrinsic must be disabled for kernel development. Add -fno-builtin flag to gcc, or remove /Oi switch in MSVC. Only paste the generated code in gcc case here:

main:
    pushl   %ebp
    movl    %esp, %ebp
    andl    $-16, %esp
    subl    $32, %esp
    movl    8(%ebp), %eax
    movl    %eax, (%esp)
    call    abs
    movl    %eax, 28(%esp)
    movl    c, %eax
    movl    %eax, %edx
    movl    $c2, %eax
    movl    $12, 8(%esp)
    movl    %edx, 4(%esp)
    movl    %eax, (%esp)
    call    memcpy
    movl    $.LC1, %eax
    movl    $c2, 8(%esp)
    movl    28(%esp), %edx
    movl    %edx, 4(%esp)
    movl    %eax, (%esp)
    call    printf
    movl    $0, %eax
    leave
    ret

main:

pushl %ebp

movl %esp, %ebp

andl $-16, %esp

subl $32, %esp

movl 8(%ebp), %eax

movl %eax, (%esp)

call abs

movl %eax, 28(%esp)

movl c, %eax

movl %eax, %edx

movl $c2, %eax

movl $12, 8(%esp)

movl %edx, 4(%esp)

movl %eax, (%esp)

call memcpy

movl $.LC1, %eax

movl $c2, 8(%esp)

movl 28(%esp), %edx

movl %edx, 4(%esp)

movl %eax, (%esp)

call printf

movl $0, %eax

leave

ret

There _are_ abs() and memcpy() now. General MSVC intrinsic can be found here.

Intrinsic is easier than inline assembly. It is used to increase performance in most cases. Both gcc and MSVC provide intrinsic support for Intel’s MMX, SSE and SSE2 instrument set. Code snippet to use MMX:

/*
 * # gcc -O2 -S -mmmx intrinsic_mmx.c -o intrinsic_mmx.s
 * # cl /O2 /c intrinsic_mmx.c /FAs /Faintrinsic_mmx.asm
 */
#include <stdio.h>
#include <mmintrin.h>

int main()
{
    __m64 m1, m2, m3;
    int out1, out2;
    int in1[] = { 222, 111 };
    int in2[] = { 444, 333 };
#if 0
    m1 = _mm_setr_pi32(in1[0], in1[1]);
    m2 = _mm_setr_pi32(in2[0], in2[1]);
#else
    m1 = *(__m64 *)in1;
    m2 = *(__m64 *)in2;
#endif
    m3 = _mm_add_pi32(m1, m2); 
    out1 = _mm_cvtsi64_si32(m3);
    m3  = _mm_srli_si64(m3, 32);
    out2 = _mm_cvtsi64_si32(m3);
    _mm_empty();
    printf("out1=%d,out2=%d\n", out1, out2);
    return 0;
}

* # gcc -O2 -S -mmmx intrinsic_mmx.c -o intrinsic_mmx.s

* # cl /O2 /c intrinsic_mmx.c /FAs /Faintrinsic_mmx.asm

#include <stdio.h>

#include <mmintrin.h>

int main()

{

__m64 m1, m2, m3;

int out1, out2;

int in1[] = { 222, 111 };

int in2[] = { 444, 333 };

#if 0

m1 = _mm_setr_pi32(in1[0], in1[1]);

m2 = _mm_setr_pi32(in2[0], in2[1]);

#else

m1 = *(__m64 *)in1;

m2 = *(__m64 *)in2;

#endif

m3 = _mm_add_pi32(m1, m2);

out1 = _mm_cvtsi64_si32(m3);

m3 = _mm_srli_si64(m3, 32);

out2 = _mm_cvtsi64_si32(m3);

_mm_empty();

printf("out1=%d,out2=%d\n", out1, out2);

return 0;

}

Assembly looks like:

main:
    pushl   %ebp
    movl    %esp, %ebp
    andl    $-16, %esp
    subl    $16, %esp
    movq    .LC1, %mm0
    paddd   .LC2, %mm0
    movd    %mm0, 8(%esp)
    psrlq   $32, %mm0
    movd    %mm0, 12(%esp)
    emms
    movl    $.LC0, 4(%esp)
    movl    $1, (%esp)
    call    __printf_chk
    xorl    %eax, %eax
    leave
    ret

main:

pushl %ebp

movl %esp, %ebp

andl $-16, %esp

subl $16, %esp

movq .LC1, %mm0

paddd .LC2, %mm0

movd %mm0, 8(%esp)

psrlq $32, %mm0

movd %mm0, 12(%esp)

emms

movl $.LC0, 4(%esp)

movl $1, (%esp)

call __printf_chk

xorl %eax, %eax

leave

ret

You see MMX registers and instruments this time. -mmmx flag is required to build for gcc. MSVC also generate similar code. Reference for these instrument set is available on Intel’s website.

A simple benchmark to use SSE is avalable here.

Jump Instruments and EFLAGS

Posted on 2013-10-29 by gonwan — No Comments ↓

There was a misleading in my knowledge of a conditional jump: It checks only the result of CMP and TEST instruments. So when it appears after other instruments like ADD or SUB, I can find no clue on how it works.

Actually, a conditional jump checks flags in the EFLAGS control register. From Intel’s manual, vol 1, 3.4.3:

The status flags (bits 0, 2, 4, 6, 7, and 11) of the EFLAGS register indicate the results of arithmetic instructions, such as the ADD, SUB, MUL, and DIV instructions. The status flag functions are:

CF (bit 0) Carry flag: Set if an arithmetic operation generates a carry or a borrow out of the most-significant bit of the result; cleared otherwise. This flag indicates an overflow condition for unsigned-integer arithmetic. It is also used in multiple-precision arithmetic.

PF (bit 2) Parity flag: Set if the least-significant byte of the result contains an even number of 1 bits; cleared otherwise.
AF (bit 4) Adjust flag: Set if an arithmetic operation generates a carry or a borrow out of bit 3 of the result; cleared otherwise. This flag is used in binary-coded decimal (BCD) arithmetic.

ZF (bit 6) Zero flag: Set if the result is zero; cleared otherwise.

SF (bit 7) Sign flag: Set equal to the most-significant bit of the result, which is the sign bit of a signed integer. (0 indicates a positive value and 1 indicates a negative value.)

OF (bit 11) Overflow flag: Set if the integer result is too large a positive number or too small a negative number (excluding the sign-bit) to fit in the destination operand; cleared otherwise. This flag indicates an overflow condition for signed-integer (two’s complement) arithmetic.

And again from vol 2a, section Jcc Jump if Condition is met, more details. I just copy content from here:

Instruction	Description	signed?	Flags	short jump opcodes	near jump opcodes
JO	Jump if overflow		OF = 1	70	0F 80
JNO	Jump if not overflow		OF = 0	71	0F 81
JS	Jump if sign		SF = 1	78	0F 88
JNS	Jump if not sign		SF = 0	79	0F 89
JE JZ	Jump if equal Jump if zero		ZF = 1	74	0F 84
JNE JNZ	Jump if not equal Jump if not zero		ZF = 0	75	0F 85
JB JNAE JC	Jump if below Jump if not above or equal Jump if carry	unsigned	CF = 1	72	0F 82
JNB JAE JNC	Jump if not below Jump if above or equal Jump if not carry	unsigned	CF = 0	73	0F 83
JBE JNA	Jump if below or equal Jump if not above	unsigned	CF = 1 or ZF = 1	76	0F 86
JA JNBE	Jump if above Jump if not below or equal	unsigned	CF = 0 and ZF = 0	77	0F 87
JL JNGE	Jump if less Jump if not greater or equal	signed	SF <> OF	7C	0F 8C
JGE JNL	Jump if greater or equal Jump if not less	signed	SF = OF	7D	0F 8D
JLE JNG	Jump if less or equal Jump if not greater	signed	ZF = 1 or SF <> OF	7E	0F 8E
JG JNLE	Jump if greater Jump if not less or equal	signed	ZF = 0 and SF = OF	7F	0F 8F
JP JPE	Jump if parity Jump if parity even		PF = 1	7A	0F 8A
JNP JPO	Jump if not parity Jump if parity odd		PF = 0	7B	0F 8B
JCXZ JECXZ	Jump if %CX register is 0 Jump if %ECX register is 0		%CX = 0 %ECX = 0	E3	E3

There are signed and unsigned versions when comparing: JA Vs JG, JB Vs JL etc.. Let’s take JA and JG to explain the difference. For JA, it’s clear that it requires CF=0(no borrow bit) and ZF=0(not equal). For JG, when two operands are both positive or negative, it requires ZF=0 and SF=OF=0. When two operands have different signs, it requires ZF=0 and the first operand is positive, thus requires SF=OF=1.

Note, the following 2 lines(AT&T syntax) are equivalent. CPU does arithmetic calculation, it does not care about whether it is signed or unsigned. It only set flags. It is we that make the signed or unsigned jump decision.

movl $-1, %eax
movl $0xffffffff, %eax

1 2	movl $-1, %eax movl $0xffffffff, %eax

Last, I’d like to use ndisasm(install nasm package to get it) to illustrate how jump instruments are encoded, including short jump, near jump and far jump:

# echo -e "\x74\x00" | ndisasm -
00000000  7400              jz 0x2
00000002  0A                db 0x0a
# echo -e "\x74\xfe" | ndisasm -
00000000  74FE              jz 0x0
00000002  0A                db 0x0a
# echo -e "\x0f\x84\x00\x00" | ndisasm -
00000000  0F840000          jz word 0x4
00000004  0A                db 0x0a
# echo -e "\x0f\x84\xfc\xff" | ndisasm -
00000000  0F84FCFF          jz word 0x0
00000004  0A                db 0x0a
# echo -e "\x0f\x84\x00\x00\x00\x00" | ndisasm - -b 32
00000000  0F8400000000      jz dword 0x6
00000006  0A                db 0x0a
# echo -e "\x0f\x84\xfa\xff\xff\xff" | ndisasm - -b 32
00000000  0F84FAFFFFFF      jz dword 0x0
00000006  0A                db 0x0a
# echo -e "\xeb\x00" | ndisasm -
00000000  EB00              jmp short 0x2
00000002  0A                db 0x0a
# echo -e "\xe9\x00\x00" | ndisasm -
00000000  E90000            jmp word 0x3
00000003  0A                db 0x0a
# echo -e "\xe9\x00\x00\x00\x00" | ndisasm - -b32
00000000  E900000000        jmp dword 0x5
00000005  0A                db 0x0a
# echo -e "\xea\x00\x00\x34\x12" | ndisasm -
00000000  EA00003412        jmp word 0x1234:0x0
00000005  0A                db 0x0a
# echo -e "\xea\x00\x00\x00\x00\x34\x12" | ndisasm - -b 32
00000000  EA000000003412    jmp dword 0x1234:0x0
00000007  0A                db 0x0a

# echo -e "\x74\x00" | ndisasm -

00000000 7400 jz 0x2

00000002 0A db 0x0a

# echo -e "\x74\xfe" | ndisasm -

00000000 74FE jz 0x0

00000002 0A db 0x0a

# echo -e "\x0f\x84\x00\x00" | ndisasm -

00000000 0F840000 jz word 0x4

00000004 0A db 0x0a

# echo -e "\x0f\x84\xfc\xff" | ndisasm -

00000000 0F84FCFF jz word 0x0

00000004 0A db 0x0a

# echo -e "\x0f\x84\x00\x00\x00\x00" | ndisasm - -b 32

00000000 0F8400000000 jz dword 0x6

00000006 0A db 0x0a

# echo -e "\x0f\x84\xfa\xff\xff\xff" | ndisasm - -b 32

00000000 0F84FAFFFFFF jz dword 0x0

00000006 0A db 0x0a

# echo -e "\xeb\x00" | ndisasm -

00000000 EB00 jmp short 0x2

00000002 0A db 0x0a

# echo -e "\xe9\x00\x00" | ndisasm -

00000000 E90000 jmp word 0x3

00000003 0A db 0x0a

# echo -e "\xe9\x00\x00\x00\x00" | ndisasm - -b32

00000000 E900000000 jmp dword 0x5

00000005 0A db 0x0a

# echo -e "\xea\x00\x00\x34\x12" | ndisasm -

00000000 EA00003412 jmp word 0x1234:0x0

00000005 0A db 0x0a

# echo -e "\xea\x00\x00\x00\x00\x34\x12" | ndisasm - -b 32

00000000 EA000000003412 jmp dword 0x1234:0x0

00000007 0A db 0x0a