一些shell技巧
xxd
命令行工具,作用是把二进制文件变成文本可读的文件
xxd是二进制转文本xxd -r是转换过的文本再转换回去vim调用外部程序
使用命令
%!比如vim调用xxd,是使用
%!xxd实现读取可执行文件的信息
readelfbinutils工具集
nm命令可以显示一个可执行文件中的符号addr2line可以将一个代码的地址转换为行号设定从一个特定的内存地址开始执行代码
将一个内存地址指针赋值给
void (*指针名)(),然后执行这个函数即可什么是加载器
将内存需要的空间搬到内存里
给正确的权限
配置初始状态
// Generated by GPT-4; unmodified
int main(int argc, char *argv[]) {
if (argc != 2) {
printf("Usage: %s <binary_file>\n", argv[0]);
return 1;
}
// Open the binary file
int fd = open(argv[1], O_RDONLY);
if (fd < 0) {
perror("open");
return 1;
}
// Get the file size
off_t file_size = lseek(fd, 0, SEEK_END);
lseek(fd, 0, SEEK_SET);
// Allocate memory for the binary
void *mem = mmap(NULL, file_size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE, fd, 0);
if (mem == MAP_FAILED) {
perror("mmap");
close(fd);
return 1;
}
// Close the file
close(fd);
// Cast the memory address to a function pointer and call it
void (*binary_func)() = (void (*)())mem;
binary_func();
// Clean up
munmap(mem, file_size);
return 0;
}静态链接
找到文件,前4kb映射到内存里
里面有
header解析
header命令行
readelf命令可以实现这个功能-
-h读的是header -
-a读所有的
-
检查执行的环境对不对,比如CPU架构是不是吻合
加载的时候需要将程序中标记了
LOAD的位置搬到内存中程序声明的相应的位置即可- 计算开始位置,结束位置,权限等
- 然后将文件映射到上述的空间中
初始化栈
-
- 在程序内存中直接定义一个静态的stack
void execve_(const char *file, char *argv[], char *envp[]) {
// WARNING: This execve_ does not free process resources.
// **NOT** all process states are properly initialized.
int fd = open(file, O_RDONLY);
assert(fd > 0);
// Map ELF header to memory
Elf64_Ehdr *h = mmap(NULL, 4096, PROT_READ, MAP_PRIVATE, fd, 0);
assert(h != MAP_FAILED);
assert(h->e_type == ET_EXEC && h->e_machine == EM_X86_64);
Elf64_Phdr *pht = (Elf64_Phdr *)((char *)h + h->e_phoff);
for (int i = 0; i < h->e_phnum; i++) {
Elf64_Phdr *p = &pht[i];
if (p->p_type == PT_LOAD) {
// Memory map region
uintptr_t map_beg = ROUND(p->p_vaddr, p->p_align);
uintptr_t map_end = map_beg + p->p_memsz;
while (map_end % p->p_align != 0) map_end++;
// Memory map flags
int prot = 0;
if (p->p_flags & PF_R) prot |= PROT_READ;
if (p->p_flags & PF_W) prot |= PROT_WRITE;
if (p->p_flags & PF_X) prot |= PROT_EXEC;
// Memory map size
int map_sz = p->p_filesz + (p->p_vaddr % p->p_align);
while (map_sz % p->p_align != 0) map_sz++;
// Map file contents to memory
void *ret = mmap(
(void *)map_beg, // addr, rounded to ALIGN
map_sz, // length
prot, // protection
MAP_PRIVATE | MAP_FIXED, // flags, private & strict
fd, // file descriptor
ROUND(p->p_offset, p->p_align) // offset
);
assert(ret != MAP_FAILED);
// Map extra anonymous memory (e.g., bss)
intptr_t extra_sz = p->p_memsz - p->p_filesz;
if (extra_sz > 0) {
uintptr_t extra_beg = map_beg + map_sz;
ret = mmap(
(void *)extra_beg, extra_sz, prot, // addr, length, protection
MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, // flags
-1, 0 // no file
);
assert(ret != MAP_FAILED);
}
}
}
close(fd);
static char stack[STK_SZ], rnd[16];
void *sp = (void *)ROUND(stack + sizeof(stack) - 4096, 16);
void *sp_exec = sp;
int argc = 0;
// argc
while (argv[argc]) argc++;
push(sp, intptr_t, argc);
// argv[], NULL-terminate
for (int i = 0; i <= argc; i++)
push(sp, intptr_t, argv[i]);
// envp[], NULL-terminate
for (; *envp; envp++) {
if (!strchr(*envp, '_')) // remove some verbose ones
push(sp, intptr_t, *envp);
}
// auxv[], AT_NULL-terminate
push(sp, intptr_t, 0);
push(sp, Elf64_auxv_t, { .a_type = AT_RANDOM, .a_un.a_val = (uintptr_t)rnd } );
push(sp, Elf64_auxv_t, { .a_type = AT_NULL } );
asm volatile(
"mov $0, %%rdx;" // required by ABI
"mov %0, %%rsp;"
"jmp *%1" : : "a"(sp_exec), "b"(h->e_entry));
}
int main(int argc, char *argv[], char *envp[]) {
if (argc < 2) {
fprintf(stderr, "Usage: %s file [args...]\n", argv[0]);
exit(1);
}
execve_(argv[1], argv + 1, envp);
}
-
当一个程序中引用了声明过但是尚未定义的函数的时候,原始程序
拆解应用程序的需求
将运行库和程序代码分离
静态链接的程序很大,动态链接较小
方便通过升级库的方式来实现升级,而不需要一旦升级就整个编译所有程序
同时还可以实现部分程序的重新编译,其他程序与之链接即可,不需要重新编译
类似地,遇到调用声明过但是没有定义的函数的时候,运行时会存在一个位置需要将其地址修改为函数定义的位置
编译器将一个现在未知的地址翻译为一个动态的位置
DSYM是动态的位置前半部分是需要加载的库和符号
- DSYM需要一张表,从名字映射到地址
dl.h
struct dl_hdr {
char magic[4];
uint32_t file_sz, code_off;
};
struct symbol {
int64_t offset;
char type, name[REC_SZ - sizeof(int64_t) - 1];
};
dlbox.c
struct dlib {
struct dl_hdr hdr;
struct symbol *symtab; // borrowed spaces from header
const char *path;
};
static struct dlib *dlopen(const char *path);
struct dlib *dlopen_chk(const char *path) {
struct dlib *lib = dlopen(path);
if (!lib) {
fprintf(stderr, "Not a valid dlib file: %s.\n", path);
exit(1);
}
return lib;
}
// Implementation of binutils
void dl_gcc(const char *path) {
char buf[256], *dot = strrchr(path, '.');
if (dot) {
*dot = '\0';
sprintf(buf, "gcc -m64 -fPIC -c %s.S && "
"objcopy -S -j .text -O binary %s.o %s.dl", path, path, path);
system(buf);
}
}
void dl_readdl(const char *path) {
struct dlib *h = dlopen_chk(path);
printf("DLIB file %s:\n\n", h->path);
for (struct symbol *sym = h->symtab; sym->type; sym++) {
switch (sym->type) {
case '+': printf(" LOAD %s\n", sym->name); break;
case '?': printf(" EXTERN %s\n", sym->name); break;
case '#': printf( "%08lx %s\n", sym->offset, sym->name); break;
}
}
}
void dl_objdump(const char *path) {
struct dlib *h = dlopen_chk(path);
char *hc = (char *)h, cmd[64];
FILE *fp = NULL;
printf("Disassembly of binary %s:\n", h->path);
for (char *code = hc + h->hdr.code_off; code < hc + h->hdr.file_sz; code++) {
for (struct symbol *sym = h->symtab; sym->type; sym++) {
if (hc + sym->offset == code) {
int off = code - hc - h->hdr.code_off;
if (fp) pclose(fp);
sprintf(cmd, "ndisasm - -b 64 -o 0x%08x\n", off);
fp = popen(cmd, "w");
printf("\n%016x <%s>:\n", off, sym->name);
fflush(stdout);
}
}
if (fp) fputc(*code, fp);
}
if (fp) pclose(fp);
}
// binutils: interpreter
void dl_interp(const char *path) {
struct dlib *h = dlopen_chk(path);
int (*entry)() = NULL;
for (struct symbol *sym = h->symtab; sym->type; sym++)
if (strcmp(sym->name, "main") == 0)
entry = (void *)((char *)h + sym->offset);
if (entry) {
exit(entry());
}
}
struct cmd {
const char *cmd;
void (*handler)(const char *path);
} commands[] = {
{ "gcc", dl_gcc },
{ "readdl", dl_readdl },
{ "objdump", dl_objdump },
{ "interp", dl_interp },
{ "", NULL },
};
int main(int argc, char *argv[]) {
if (argc < 3) {
fprintf(stderr, "Usage: %s {gcc|readdl|objdump|interp} FILE...\n", argv[0]);
return 1;
}
for (struct cmd *cmd = &commands[0]; cmd->handler; cmd++) {
for (char **path = &argv[2]; *path && strcmp(argv[1], cmd->cmd) == 0; path++) {
if (path != argv + 2) printf("\n");
cmd->handler(*path);
}
}
}
// Implementation of dlopen()
static struct symbol *libs[16], syms[128];
static void *dlsym(const char *name);
static void dlexport(const char *name, void *addr);
static void dlload(struct symbol *sym);
static struct dlib *dlopen(const char *path) {
struct dl_hdr hdr;
struct dlib *h;
int fd = open(path, O_RDONLY);
if (fd < 0) goto bad;
if (read(fd, &hdr, sizeof(hdr)) < sizeof(hdr)) goto bad;
if (strncmp(hdr.magic, DL_MAGIC, strlen(DL_MAGIC)) != 0) goto bad;
h = mmap(NULL, hdr.file_sz, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE, fd, 0);
if (h == (void *)-1) goto bad;
h->symtab = (struct symbol *)((char *)h + REC_SZ);
h->path = path;
for (struct symbol *sym = h->symtab; sym->type; sym++) {
switch (sym->type) {
case '+': dlload(sym); break; // (recursively) load
case '?': sym->offset = (uintptr_t)dlsym(sym->name); break; // resolve
case '#': dlexport(sym->name, (char *)h + sym->offset); break; // export
}
}
return h;
bad:
if (fd > 0) close(fd);
return NULL;
}
static void *dlsym(const char *name) {
for (int i = 0; i < LENGTH(syms); i++)
if (strcmp(syms[i].name, name) == 0)
return (void *)syms[i].offset;
assert(0);
}
static void dlexport(const char *name, void *addr) {
for (int i = 0; i < LENGTH(syms); i++)
if (!syms[i].name[0]) {
syms[i].offset = (uintptr_t)addr; // load-time offset
strcpy(syms[i].name, name);
return;
}
assert(0);
}
static void dlload(struct symbol *sym) {
for (int i = 0; i < LENGTH(libs); i++) {
if (libs[i] && strcmp(libs[i]->name, sym->name) == 0) return; // already loaded
if (!libs[i]) {
libs[i] = sym;
dlopen(sym->name); // load recursively
return;
}
}
assert(0);
}多次依赖的库只会加载一次
位置无关代码
将定位从绝对位置更改为相对pc指针的位置,防止不同模块重叠地址
每一段代码除了代码和数据之外还有一个TABLE
引用外部函数的时候,先访问TABLE上对应的位置,再从此处查到真正的位置
- C语言代码的角度无法区分这两者的区别
- 外部库的调用没有内部那么频繁
- 实际上只有涉及外部调用的时候才会查表,程序执行的都是都是CALL
- 以上是真正查表的位置
虚拟地址空间映射
- 操作系统对于只读的代码,只分配一次内存,不会因为创建了多个副本就映射多次浪费内存
fork()时对于虚拟地址的复制
- 一个内存只要不写,就可以不复制
- 只读的内存是不复制的