binary-analysis-patterns

Assembly instruction patterns, control flow analysis, and decompilation techniques for understanding compiled binaries. Covers x86-64, ARM64, and ARM32 calling conventions with detailed instruction patterns for function prologues, epilogues, and parameter passing across System V and Microsoft x64 ABIs Includes control flow patterns for conditionals, loops, and switch statements, plus data structure recognition for arrays, structs, and linked lists Provides common code patterns for string operations, arithmetic optimizations, and bit manipulation with assembly examples Offers Ghidra and IDA Pro integration guidance, including scripting examples for function signature recovery, structure creation, and pattern matching Documents decompilation strategies for variable recovery, type inference, and algorithm identification with analysis workflow best practices

INSTALLATION
npx skills add https://github.com/wshobson/agents --skill binary-analysis-patterns
Run in your project or agent environment. Adjust flags if your CLI version differs.

SKILL.md

Binary Analysis Patterns

Comprehensive patterns and techniques for analyzing compiled binaries, understanding assembly code, and reconstructing program logic.

Disassembly Fundamentals

x86-64 Instruction Patterns

#### Function Prologue/Epilogue

; Standard prologue

push rbp           ; Save base pointer

mov rbp, rsp       ; Set up stack frame

sub rsp, 0x20      ; Allocate local variables

; Leaf function (no calls)

; May skip frame pointer setup

sub rsp, 0x18      ; Just allocate locals

; Standard epilogue

mov rsp, rbp ; Restore stack pointer

pop rbp ; Restore base pointer

ret

; Leave instruction (equivalent)

leave ; mov rsp, rbp; pop rbp

ret

#### Calling Conventions

**System V AMD64 (Linux, macOS)**

; Arguments: RDI, RSI, RDX, RCX, R8, R9, then stack

; Return: RAX (and RDX for 128-bit)

; Caller-saved: RAX, RCX, RDX, RSI, RDI, R8-R11

; Callee-saved: RBX, RBP, R12-R15

; Example: func(a, b, c, d, e, f, g)

mov rdi, [a] ; 1st arg

mov rsi, [b] ; 2nd arg

mov rdx, [c] ; 3rd arg

mov rcx, [d] ; 4th arg

mov r8, [e] ; 5th arg

mov r9, [f] ; 6th arg

push [g] ; 7th arg on stack

call func


**Microsoft x64 (Windows)**

; Arguments: RCX, RDX, R8, R9, then stack

; Shadow space: 32 bytes reserved on stack

; Return: RAX

; Example: func(a, b, c, d, e)

sub rsp, 0x28 ; Shadow space + alignment

mov rcx, [a] ; 1st arg

mov rdx, [b] ; 2nd arg

mov r8, [c] ; 3rd arg

mov r9, [d] ; 4th arg

mov [rsp+0x20], [e] ; 5th arg on stack

call func

add rsp, 0x28


### ARM Assembly Patterns

#### ARM64 (AArch64) Calling Convention

; Arguments: X0-X7

; Return: X0 (and X1 for 128-bit)

; Frame pointer: X29

; Link register: X30

; Function prologue

stp x29, x30, [sp, #-16]! ; Save FP and LR

mov x29, sp ; Set frame pointer

; Function epilogue

ldp x29, x30, [sp], #16 ; Restore FP and LR

ret


#### ARM32 Calling Convention

; Arguments: R0-R3, then stack

; Return: R0 (and R1 for 64-bit)

; Link register: LR (R14)

; Function prologue

push {fp, lr}

add fp, sp, #4

; Function epilogue

pop {fp, pc} ; Return by popping PC


## Control Flow Patterns

### Conditional Branches

; if (a == b)

cmp eax, ebx

jne skip_block

; ... if body ...

skip_block:

; if (a < b) - signed

cmp eax, ebx

jge skip_block ; Jump if greater or equal

; ... if body ...

skip_block:

; if (a < b) - unsigned

cmp eax, ebx

jae skip_block ; Jump if above or equal

; ... if body ...

skip_block:


### Loop Patterns

; for (int i = 0; i < n; i++)

xor ecx, ecx ; i = 0

loop_start:

cmp ecx, [n] ; i < n

jge loop_end

; ... loop body ...

inc ecx ; i++

jmp loop_start

loop_end:

; while (condition)

jmp loop_check

loop_body:

; ... body ...

loop_check:

cmp eax, ebx

jl loop_body

; do-while

loop_body:

; ... body ...

cmp eax, ebx

jl loop_body


### Switch Statement Patterns

; Jump table pattern

mov eax, [switch_var]

cmp eax, max_case

ja default_case

jmp [jump_table + eax*8]

; Sequential comparison (small switch)

cmp eax, 1

je case_1

cmp eax, 2

je case_2

cmp eax, 3

je case_3

jmp default_case


## Data Structure Patterns

### Array Access

; array[i] - 4-byte elements

mov eax, [rbx + rcx*4] ; rbx=base, rcx=index

; array[i] - 8-byte elements

mov rax, [rbx + rcx*8]

; Multi-dimensional array[i][j]

; arr[i][j] = base + (i cols + j) element_size

imul eax, [cols]

add eax, [j]

mov edx, [rbx + rax*4]


### Structure Access

struct Example {

int a; // offset 0

char b; // offset 4

// padding // offset 5-7

long c; // offset 8

short d; // offset 16

};

; Accessing struct fields

mov rdi, [struct_ptr]

mov eax, [rdi] ; s->a (offset 0)

movzx eax, byte [rdi+4] ; s->b (offset 4)

mov rax, [rdi+8] ; s->c (offset 8)

movzx eax, word [rdi+16] ; s->d (offset 16)


### Linked List Traversal

; while (node != NULL)

list_loop:

test rdi, rdi ; node == NULL?

jz list_done

; ... process node ...

mov rdi, [rdi+8] ; node = node->next (assuming next at offset 8)

jmp list_loop

list_done:


## Common Code Patterns

### String Operations

; strlen pattern

xor ecx, ecx

strlen_loop:

cmp byte [rdi + rcx], 0

je strlen_done

inc ecx

jmp strlen_loop

strlen_done:

; ecx contains length

; strcpy pattern

strcpy_loop:

mov al, [rsi]

mov [rdi], al

test al, al

jz strcpy_done

inc rsi

inc rdi

jmp strcpy_loop

strcpy_done:

; memcpy using rep movsb

mov rdi, dest

mov rsi, src

mov rcx, count

rep movsb


### Arithmetic Patterns

; Multiplication by constant

; x * 3

lea eax, [rax + rax*2]

; x * 5

lea eax, [rax + rax*4]

; x * 10

lea eax, [rax + rax4] ; x 5

add eax, eax ; * 2

; Division by power of 2 (signed)

mov eax, [x]

cdq ; Sign extend to EDX:EAX

and edx, 7 ; For divide by 8

add eax, edx ; Adjust for negative

sar eax, 3 ; Arithmetic shift right

; Modulo power of 2

and eax, 7 ; x % 8


### Bit Manipulation

; Test specific bit

test eax, 0x80 ; Test bit 7

jnz bit_set

; Set bit

or eax, 0x10 ; Set bit 4

; Clear bit

and eax, ~0x10 ; Clear bit 4

; Toggle bit

xor eax, 0x10 ; Toggle bit 4

; Count leading zeros

bsr eax, ecx ; Bit scan reverse

xor eax, 31 ; Convert to leading zeros

; Population count (popcnt)

popcnt eax, ecx ; Count set bits


## Decompilation Patterns

### Variable Recovery

; Local variable at rbp-8

mov qword [rbp-8], rax ; Store to local

mov rax, [rbp-8] ; Load from local

; Stack-allocated array

lea rax, [rbp-0x40] ; Array starts at rbp-0x40

mov [rax], edx ; array[0] = value

mov [rax+4], ecx ; array[1] = value


### Function Signature Recovery

; Identify parameters by register usage

func:

; rdi used as first param (System V)

mov [rbp-8], rdi ; Save param to local

; rsi used as second param

mov [rbp-16], rsi

; Identify return by RAX at end

mov rax, [result]

ret


### Type Recovery

; 1-byte operations suggest char/bool

movzx eax, byte [rdi] ; Zero-extend byte

movsx eax, byte [rdi] ; Sign-extend byte

; 2-byte operations suggest short

movzx eax, word [rdi]

movsx eax, word [rdi]

; 4-byte operations suggest int/float

mov eax, [rdi]

movss xmm0, [rdi] ; Float

; 8-byte operations suggest long/double/pointer

mov rax, [rdi]

movsd xmm0, [rdi] ; Double


## Ghidra Analysis Tips

### Improving Decompilation

// In Ghidra scripting

// Fix function signature

Function func = getFunctionAt(toAddr(0x401000));

func.setReturnType(IntegerDataType.dataType, SourceType.USER_DEFINED);

// Create structure type

StructureDataType struct = new StructureDataType("MyStruct", 0);

struct.add(IntegerDataType.dataType, "field_a", null);

struct.add(PointerDataType.dataType, "next", null);

// Apply to memory

createData(toAddr(0x601000), struct);


### Pattern Matching Scripts

Find all calls to dangerous functions

for func in currentProgram.getFunctionManager().getFunctions(True):

for ref in getReferencesTo(func.getEntryPoint()):

if func.getName() in ["strcpy", "sprintf", "gets"]:

print(f"Dangerous call at {ref.getFromAddress()}")


## IDA Pro Patterns

### IDAPython Analysis

import idaapi

import idautils

import idc

Find all function calls

def find_calls(func_name):

for func_ea in idautils.Functions():

for head in idautils.Heads(func_ea, idc.find_func_end(func_ea)):

if idc.print_insn_mnem(head) == "call":

target = idc.get_operand_value(head, 0)

if idc.get_func_name(target) == func_name:

print(f"Call to {func_name} at {hex(head)}")

Rename functions based on strings

def auto_rename():

for s in idautils.Strings():

for xref in idautils.XrefsTo(s.ea):

func = idaapi.get_func(xref.frm)

if func and "sub_" in idc.get_func_name(func.start_ea):

# Use string as hint for naming

pass

BrowserAct

Let your agent run on any real-world website

Bypass CAPTCHA & anti-bot for free. Start local, scale to cloud.

Explore BrowserAct Skills →

Stop writing automation&scrapers

Install the CLI. Run your first Skill in 30 seconds. Scale when you're ready.

Start free
free · no credit card