#include "qemu/osdep.h" #include "sysemu/sysemu.h" #include "cpu.h" #include "qemu/main-loop.h" #include "exec/ram_addr.h" #include "qemu/rcu_queue.h" #include "migration/migration.h" #include "nyx/debug.h" #include "nyx/memory_access.h" #include "nyx/snapshot/memory/shadow_memory.h" #include "nyx/snapshot/helper.h" typedef struct fast_reload_dump_head_s{ uint32_t shadow_memory_regions; uint32_t ram_region_index; // remove } fast_reload_dump_head_t; typedef struct fast_reload_dump_entry_s{ uint64_t shadow_memory_offset; char idstr[256]; } fast_reload_dump_entry_t; static void shadow_memory_set_incremental_ptrs(shadow_memory_t* self){ for(uint8_t i = 0; i < self->ram_regions_num; i++){ self->ram_regions[i].incremental_region_ptr = self->incremental_ptr + self->ram_regions[i].offset; } } static void shadow_memory_pre_alloc_incremental(shadow_memory_t* self){ self->incremental_ptr = mmap(0, self->memory_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, self->snapshot_ptr_fd, 0); shadow_memory_set_incremental_ptrs(self); } static void shadow_memory_init_generic(shadow_memory_t* self){ self->root_track_pages_num = 0; self->root_track_pages_size = 32 << 10; self->root_track_pages_stack = malloc(sizeof(uint64_t)*self->root_track_pages_size); shadow_memory_pre_alloc_incremental(self); self->incremental_enabled = false; } shadow_memory_t* shadow_memory_init(void){ RAMBlock *block; RAMBlock* block_array[10]; void* snapshot_ptr_offset_array[10]; shadow_memory_t* self = malloc(sizeof(shadow_memory_t)); memset(self, 0x0, sizeof(shadow_memory_t)); QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { self->memory_size += block->used_length; } self->snapshot_ptr_fd = memfd_create("in_memory_root_snapshot", MFD_CLOEXEC | MFD_ALLOW_SEALING); assert(!ftruncate(self->snapshot_ptr_fd, self->memory_size)); fcntl(self->snapshot_ptr_fd, F_ADD_SEALS, F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL); //printf("MMAP -> 0x%lx\n", self->memory_size); self->snapshot_ptr = mmap(NULL, self->memory_size, PROT_READ | PROT_WRITE , MAP_SHARED , self->snapshot_ptr_fd, 0); madvise(self->snapshot_ptr, self->memory_size, MADV_RANDOM | MADV_MERGEABLE); QEMU_PT_PRINTF(RELOAD_PREFIX, "Allocating Memory (%p) Size: %lx", self->snapshot_ptr, self->memory_size); uint64_t offset = 0; uint8_t i = 0; uint8_t regions_num = 0; QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { QEMU_PT_PRINTF(RELOAD_PREFIX, "%lx %lx %lx\t%s\t%p", block->offset, block->used_length, block->max_length, block->idstr, block->host); //printf("%lx %lx %lx\t%s\t%p\n", block->offset, block->used_length, block->max_length, block->idstr, block->host); block_array[i] = block; memcpy(self->snapshot_ptr+offset, block->host, block->used_length); snapshot_ptr_offset_array[i++] = self->snapshot_ptr+offset; offset += block->used_length; regions_num++; } for(uint8_t i = 0; i < regions_num; i++){ block = block_array[i]; if(!block->mr->readonly){ if(self->ram_regions_num == 0 && block->used_length >= MEM_SPLIT_START){ self->ram_regions[self->ram_regions_num].ram_region = i; self->ram_regions[self->ram_regions_num].base = block->mr->addr; self->ram_regions[self->ram_regions_num].size = MEM_SPLIT_START; self->ram_regions[self->ram_regions_num].offset = snapshot_ptr_offset_array[i] - snapshot_ptr_offset_array[0]; self->ram_regions[self->ram_regions_num].host_region_ptr = block->host; self->ram_regions[self->ram_regions_num].snapshot_region_ptr = self->snapshot_ptr+self->ram_regions[self->ram_regions_num].offset; self->ram_regions[self->ram_regions_num].idstr = malloc(strlen(block->idstr) + 1); memset(self->ram_regions[self->ram_regions_num].idstr, 0, strlen(block->idstr) + 1); strcpy(self->ram_regions[self->ram_regions_num].idstr, block->idstr); self->ram_regions_num++; self->ram_regions[self->ram_regions_num].ram_region = i; self->ram_regions[self->ram_regions_num].base = MEM_SPLIT_END; self->ram_regions[self->ram_regions_num].size = block->used_length-MEM_SPLIT_START; self->ram_regions[self->ram_regions_num].offset = (snapshot_ptr_offset_array[i] + MEM_SPLIT_START) - snapshot_ptr_offset_array[0]; self->ram_regions[self->ram_regions_num].host_region_ptr = block->host+MEM_SPLIT_START; //self->ram_regions[self->ram_regions_num].snapshot_region_ptr = self->ptr+self->ram_regions[self->ram_regions_num].offset; self->ram_regions[self->ram_regions_num].snapshot_region_ptr = snapshot_ptr_offset_array[i]+MEM_SPLIT_START; self->ram_regions[self->ram_regions_num].idstr = malloc(strlen(block->idstr) + 1); memset(self->ram_regions[self->ram_regions_num].idstr, 0, strlen(block->idstr) + 1); strcpy(self->ram_regions[self->ram_regions_num].idstr, block->idstr); } else{ self->ram_regions[self->ram_regions_num].ram_region = i; self->ram_regions[self->ram_regions_num].base = block->mr->addr; self->ram_regions[self->ram_regions_num].size = block->used_length; self->ram_regions[self->ram_regions_num].offset = snapshot_ptr_offset_array[i] - snapshot_ptr_offset_array[0]; self->ram_regions[self->ram_regions_num].host_region_ptr = block->host; self->ram_regions[self->ram_regions_num].snapshot_region_ptr = self->snapshot_ptr+self->ram_regions[self->ram_regions_num].offset; self->ram_regions[self->ram_regions_num].idstr = malloc(strlen(block->idstr) + 1); memset(self->ram_regions[self->ram_regions_num].idstr, 0, strlen(block->idstr) + 1); strcpy(self->ram_regions[self->ram_regions_num].idstr, block->idstr); } self->ram_regions_num++; } } shadow_memory_init_generic(self); return self; } shadow_memory_t* shadow_memory_init_from_snapshot(const char* snapshot_folder, bool pre_snapshot){ RAMBlock *block; RAMBlock* block_array[10]; void* snapshot_ptr_offset_array[10]; shadow_memory_t* self = malloc(sizeof(shadow_memory_t)); memset(self, 0x0, sizeof(shadow_memory_t)); /* count total memory size */ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { self->memory_size += block->used_length; } /* count number of ram regions */ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { if(!block->mr->readonly){ if(self->ram_regions_num == 0 && block->used_length >= MEM_SPLIT_START){ self->ram_regions_num++; } self->ram_regions_num++; } } char* path_meta; char* path_dump; assert(asprintf(&path_meta, "%s/fast_snapshot.mem_meta", snapshot_folder) != -1); assert(asprintf(&path_dump, "%s/fast_snapshot.mem_dump", snapshot_folder) != -1); fast_reload_dump_head_t head; FILE* file_mem_meta = fopen (path_meta, "r"); assert(file_mem_meta != NULL); assert(fread(&head, sizeof(fast_reload_dump_head_t), 1, file_mem_meta) == 1); fclose(file_mem_meta); if(self->ram_regions_num != head.shadow_memory_regions){ fprintf(stderr, "Error: self->ram_regions_num (%d) != head.shadow_memory_regions (%d)\n", self->ram_regions_num, head.shadow_memory_regions); exit(1); } //printf("LOAD -> self->ram_regions_num: %d\n", self->ram_regions_num); FILE* file_mem_dump = fopen (path_dump, "r"); assert(file_mem_dump != NULL); fseek(file_mem_dump, 0L, SEEK_END); uint64_t file_mem_dump_size = ftell(file_mem_dump); debug_fprintf(stderr, "guest_ram_size == ftell(f) => 0x%lx vs 0x%lx (%s)\n", self->memory_size, file_mem_dump_size, path_dump); #define VGA_SIZE (16<<20) if(self->memory_size != file_mem_dump_size){ if (file_mem_dump_size >= VGA_SIZE){ fprintf(stderr, "ERROR: guest size should be %ld MB - set it to %ld MB\n", (file_mem_dump_size-VGA_SIZE)>>20, (self->memory_size-VGA_SIZE)>>20); exit(1); } else{ fprintf(stderr, "ERROR: guest size: %ld bytes\n", file_mem_dump_size); exit(1); } } assert(self->memory_size == ftell(file_mem_dump)); fseek(file_mem_dump, 0L, SEEK_SET); fclose(file_mem_dump); self->snapshot_ptr_fd = open(path_dump, O_RDONLY); //printf("self->snapshot_ptr_fd: %d\n", self->snapshot_ptr_fd); self->snapshot_ptr = mmap(0, self->memory_size, PROT_READ, MAP_SHARED, self->snapshot_ptr_fd, 0); //printf("TRY TO MMAP : %p\n", self->snapshot_ptr); assert(self->snapshot_ptr != (void*)-1); madvise(self->snapshot_ptr, self->memory_size, MADV_MERGEABLE); uint64_t offset = 0; uint8_t i = 0; uint8_t regions_num = 0; QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { QEMU_PT_PRINTF(RELOAD_PREFIX, "%lx %lx %lx\t%s\t%p", block->offset, block->used_length, block->max_length, block->idstr, block->host); //printf("%lx %lx %lx\t%s\t%p\n", block->offset, block->used_length, block->max_length, block->idstr, block->host); block_array[i] = block; snapshot_ptr_offset_array[i++] = self->snapshot_ptr+offset; offset += block->used_length; regions_num++; } self->ram_regions_num = 0; for(uint8_t i = 0; i < regions_num; i++){ block = block_array[i]; if(!block->mr->readonly){ if(self->ram_regions_num == 0 && block->used_length >= MEM_SPLIT_START){ self->ram_regions[self->ram_regions_num].ram_region = i; self->ram_regions[self->ram_regions_num].base = block->mr->addr; self->ram_regions[self->ram_regions_num].size = MEM_SPLIT_START; self->ram_regions[self->ram_regions_num].offset = snapshot_ptr_offset_array[i] - snapshot_ptr_offset_array[0]; self->ram_regions[self->ram_regions_num].host_region_ptr = block->host; self->ram_regions[self->ram_regions_num].snapshot_region_ptr = self->snapshot_ptr+self->ram_regions[self->ram_regions_num].offset; self->ram_regions[self->ram_regions_num].idstr = malloc(strlen(block->idstr) + 1); memset(self->ram_regions[self->ram_regions_num].idstr, 0, strlen(block->idstr) + 1); strcpy(self->ram_regions[self->ram_regions_num].idstr, block->idstr); self->ram_regions_num++; self->ram_regions[self->ram_regions_num].ram_region = i; self->ram_regions[self->ram_regions_num].base = MEM_SPLIT_END; self->ram_regions[self->ram_regions_num].size = block->used_length-MEM_SPLIT_START; self->ram_regions[self->ram_regions_num].offset = (snapshot_ptr_offset_array[i] + MEM_SPLIT_START) - snapshot_ptr_offset_array[0]; self->ram_regions[self->ram_regions_num].host_region_ptr = block->host+MEM_SPLIT_START; //self->ram_regions[self->ram_regions_num].snapshot_region_ptr = self->ptr+self->ram_regions[self->ram_regions_num].offset; self->ram_regions[self->ram_regions_num].snapshot_region_ptr = snapshot_ptr_offset_array[i]+MEM_SPLIT_START; self->ram_regions[self->ram_regions_num].idstr = malloc(strlen(block->idstr) + 1); memset(self->ram_regions[self->ram_regions_num].idstr, 0, strlen(block->idstr) + 1); strcpy(self->ram_regions[self->ram_regions_num].idstr, block->idstr); } else{ self->ram_regions[self->ram_regions_num].ram_region = i; self->ram_regions[self->ram_regions_num].base = block->mr->addr; self->ram_regions[self->ram_regions_num].size = block->used_length; self->ram_regions[self->ram_regions_num].offset = snapshot_ptr_offset_array[i] - snapshot_ptr_offset_array[0]; self->ram_regions[self->ram_regions_num].host_region_ptr = block->host; self->ram_regions[self->ram_regions_num].snapshot_region_ptr = self->snapshot_ptr+self->ram_regions[self->ram_regions_num].offset; self->ram_regions[self->ram_regions_num].idstr = malloc(strlen(block->idstr) + 1); memset(self->ram_regions[self->ram_regions_num].idstr, 0, strlen(block->idstr) + 1); strcpy(self->ram_regions[self->ram_regions_num].idstr, block->idstr); } self->ram_regions_num++; } } /* memcpy version */ /* for(uint8_t i = 0; i < self->ram_regions_num; i++){ void* host_addr = self->ram_regions[i].host_region_ptr + 0; void* snapshot_addr = self->ram_regions[i].snapshot_region_ptr + 0; memcpy(host_addr, snapshot_addr, self->ram_regions[i].size); } */ /* munmap + mmap version */ for(uint8_t i = 0; i < self->ram_regions_num; i++){ void* host_addr = self->ram_regions[i].host_region_ptr + 0; assert(munmap(host_addr, self->ram_regions[i].size) != EINVAL); assert(mmap(host_addr, self->ram_regions[i].size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_FIXED, self->snapshot_ptr_fd, self->ram_regions[i].offset) != MAP_FAILED); } shadow_memory_init_generic(self); return self; } void shadow_memory_prepare_incremental(shadow_memory_t* self){ static int count = 0; if(count >= RESTORE_RATE){ count = 0; munmap(self->incremental_ptr, self->memory_size); self->incremental_ptr = mmap(0, self->memory_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, self->snapshot_ptr_fd, 0); shadow_memory_set_incremental_ptrs(self); } count++; } void shadow_memory_switch_snapshot(shadow_memory_t* self, bool incremental){ self->incremental_enabled = incremental; } void shadow_memory_restore_memory(shadow_memory_t* self){ rcu_read_lock(); uint8_t slot = 0; uint64_t addr = 0; for(uint64_t i = 0; i < self->root_track_pages_num; i++){ addr = self->root_track_pages_stack[i] & 0xFFFFFFFFFFFFF000; slot = self->root_track_pages_stack[i] & 0xFFF; memcpy(self->ram_regions[slot].host_region_ptr+addr, self->ram_regions[slot].snapshot_region_ptr+addr, TARGET_PAGE_SIZE); memcpy(self->ram_regions[slot].incremental_region_ptr+addr, self->ram_regions[slot].snapshot_region_ptr+addr, TARGET_PAGE_SIZE); } self->root_track_pages_num = 0; rcu_read_unlock(); } /* only used in debug mode -> no need to be fast */ bool shadow_memory_is_root_page_tracked(shadow_memory_t* self, uint64_t address, uint8_t slot){ uint64_t value = (address & 0xFFFFFFFFFFFFF000) | slot; for(uint64_t i = 0; i < self->root_track_pages_num; i++){ if(self->root_track_pages_stack[i] == value){ return true; } } return false; } void shadow_memory_serialize(shadow_memory_t* self, const char* snapshot_folder){ char* tmp1; char* tmp2; assert(asprintf(&tmp1, "%s/fast_snapshot.mem_meta", snapshot_folder) != -1); assert(asprintf(&tmp2, "%s/fast_snapshot.mem_dump", snapshot_folder) != -1); FILE* file_mem_meta = fopen(tmp1, "w+b"); FILE* file_mem_data = fopen(tmp2, "w+b"); //} FILE* file_ptr_meta, FILE* file_ptr_data){ //assert(self); //assert(file_ptr_meta); //assert(file_ptr_data); /* debug_printf("black_list_pages_num: %lx\n", self->black_list_pages_num); debug_printf("black_list_pages_size: %lx\n", self->black_list_pages_size); debug_printf("black_list_pages ...\n"); for (uint64_t i = 0; i < self->black_list_pages_num; i++ ){ debug_printf("self->black_list_pages[%ld] = %lx\n", i, self->black_list_pages[i]); } */ //printf("shadow_memory_regions: %d\n", self->ram_regions_num); //debug_printf("ram_region_index: %d\n", self->ram_region_index); /* for (uint32_t i = 0; i < self->ram_regions_num; i++){ printf("self->shadow_memory[%d] = %lx %s\n", i, self->ram_regions[i].base, self->ram_regions[i].idstr); } printf("ram_size: %lx\n", self->memory_size); */ fast_reload_dump_head_t head; fast_reload_dump_entry_t entry; head.shadow_memory_regions = self->ram_regions_num; head.ram_region_index = 0; /* due to legacy reasons */ fwrite(&head, sizeof(fast_reload_dump_head_t), 1, file_mem_meta); for (uint64_t i = 0; i < self->ram_regions_num; i++){ memset(&entry, 0x0, sizeof(fast_reload_dump_entry_t)); entry.shadow_memory_offset = (uint64_t)self->ram_regions[i].offset; strncpy((char*)&entry.idstr, (const char*)self->ram_regions[i].idstr, 255); fwrite(&entry, sizeof(fast_reload_dump_entry_t), 1, file_mem_meta); } fwrite(self->snapshot_ptr, self->memory_size, 1, file_mem_data); fclose(file_mem_meta); fclose(file_mem_data); } static bool shadow_memory_read_page_frame(shadow_memory_t* self, uint64_t address, void* ptr, uint16_t offset, uint16_t size){ assert((offset + size) <= 0x1000); for(uint8_t i = 0; i < self->ram_regions_num; i++){ if(address >= self->ram_regions[i].base && address < (self->ram_regions[i].base + self->ram_regions[i].size)){ void* snapshot_ptr = self->ram_regions[i].snapshot_region_ptr + (address-self->ram_regions[i].base); memcpy(ptr, snapshot_ptr+offset, size); return true; } } return false; } bool shadow_memory_read_physical_memory(shadow_memory_t* self, uint64_t address, void* ptr, size_t size){ size_t bytes_left = size; size_t copy_bytes = 0; uint64_t current_address = address; uint64_t offset = 0; while (bytes_left != 0) { /* full page */ if ((current_address & 0xFFF) == 0){ copy_bytes = 0x1000; } /* partial page (starting at an offset) */ else { copy_bytes = 0x1000 - (current_address & 0xFFF); } /* partial page */ if (bytes_left < copy_bytes){ copy_bytes = bytes_left; } if (shadow_memory_read_page_frame(self, current_address & ~0xFFFULL, ptr + offset, current_address & 0xFFFULL, copy_bytes) == false){ return false; } current_address += copy_bytes; offset += copy_bytes; bytes_left = bytes_left - copy_bytes; } return true; }