QEMU-Nyx-fork/nyx/snapshot/memory/shadow_memory.c
2022-04-18 19:24:58 +02:00

442 lines
18 KiB
C

#include "qemu/osdep.h"
#include "sysemu/sysemu.h"
#include "cpu.h"
#include "qemu/main-loop.h"
#include "exec/ram_addr.h"
#include "qemu/rcu_queue.h"
#include "migration/migration.h"
#include "nyx/debug.h"
#include "nyx/memory_access.h"
#include "nyx/snapshot/memory/shadow_memory.h"
#include "nyx/snapshot/helper.h"
typedef struct fast_reload_dump_head_s{
uint32_t shadow_memory_regions;
uint32_t ram_region_index; // remove
} fast_reload_dump_head_t;
typedef struct fast_reload_dump_entry_s{
uint64_t shadow_memory_offset;
char idstr[256];
} fast_reload_dump_entry_t;
static void shadow_memory_set_incremental_ptrs(shadow_memory_t* self){
for(uint8_t i = 0; i < self->ram_regions_num; i++){
self->ram_regions[i].incremental_region_ptr = self->incremental_ptr + self->ram_regions[i].offset;
}
}
static void shadow_memory_pre_alloc_incremental(shadow_memory_t* self){
self->incremental_ptr = mmap(0, self->memory_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, self->snapshot_ptr_fd, 0);
shadow_memory_set_incremental_ptrs(self);
}
static void shadow_memory_init_generic(shadow_memory_t* self){
self->root_track_pages_num = 0;
self->root_track_pages_size = 32 << 10;
self->root_track_pages_stack = malloc(sizeof(uint64_t)*self->root_track_pages_size);
shadow_memory_pre_alloc_incremental(self);
self->incremental_enabled = false;
}
shadow_memory_t* shadow_memory_init(void){
RAMBlock *block;
RAMBlock* block_array[10];
void* snapshot_ptr_offset_array[10];
shadow_memory_t* self = malloc(sizeof(shadow_memory_t));
memset(self, 0x0, sizeof(shadow_memory_t));
QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
self->memory_size += block->used_length;
}
self->snapshot_ptr_fd = memfd_create("in_memory_root_snapshot", MFD_CLOEXEC | MFD_ALLOW_SEALING);
assert(!ftruncate(self->snapshot_ptr_fd, self->memory_size));
fcntl(self->snapshot_ptr_fd, F_ADD_SEALS, F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL);
//printf("MMAP -> 0x%lx\n", self->memory_size);
self->snapshot_ptr = mmap(NULL, self->memory_size, PROT_READ | PROT_WRITE , MAP_SHARED , self->snapshot_ptr_fd, 0);
madvise(self->snapshot_ptr, self->memory_size, MADV_RANDOM | MADV_MERGEABLE);
QEMU_PT_PRINTF(RELOAD_PREFIX, "Allocating Memory (%p) Size: %lx", self->snapshot_ptr, self->memory_size);
uint64_t offset = 0;
uint8_t i = 0;
uint8_t regions_num = 0;
QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
QEMU_PT_PRINTF(RELOAD_PREFIX, "%lx %lx %lx\t%s\t%p", block->offset, block->used_length, block->max_length, block->idstr, block->host);
//printf("%lx %lx %lx\t%s\t%p\n", block->offset, block->used_length, block->max_length, block->idstr, block->host);
block_array[i] = block;
memcpy(self->snapshot_ptr+offset, block->host, block->used_length);
snapshot_ptr_offset_array[i++] = self->snapshot_ptr+offset;
offset += block->used_length;
regions_num++;
}
for(uint8_t i = 0; i < regions_num; i++){
block = block_array[i];
if(!block->mr->readonly){
if(self->ram_regions_num == 0 && block->used_length >= MEM_SPLIT_START){
self->ram_regions[self->ram_regions_num].ram_region = i;
self->ram_regions[self->ram_regions_num].base = block->mr->addr;
self->ram_regions[self->ram_regions_num].size = MEM_SPLIT_START;
self->ram_regions[self->ram_regions_num].offset = snapshot_ptr_offset_array[i] - snapshot_ptr_offset_array[0];
self->ram_regions[self->ram_regions_num].host_region_ptr = block->host;
self->ram_regions[self->ram_regions_num].snapshot_region_ptr = self->snapshot_ptr+self->ram_regions[self->ram_regions_num].offset;
self->ram_regions[self->ram_regions_num].idstr = malloc(strlen(block->idstr) + 1);
memset(self->ram_regions[self->ram_regions_num].idstr, 0, strlen(block->idstr) + 1);
strcpy(self->ram_regions[self->ram_regions_num].idstr, block->idstr);
self->ram_regions_num++;
self->ram_regions[self->ram_regions_num].ram_region = i;
self->ram_regions[self->ram_regions_num].base = MEM_SPLIT_END;
self->ram_regions[self->ram_regions_num].size = block->used_length-MEM_SPLIT_START;
self->ram_regions[self->ram_regions_num].offset = (snapshot_ptr_offset_array[i] + MEM_SPLIT_START) - snapshot_ptr_offset_array[0];
self->ram_regions[self->ram_regions_num].host_region_ptr = block->host+MEM_SPLIT_START;
//self->ram_regions[self->ram_regions_num].snapshot_region_ptr = self->ptr+self->ram_regions[self->ram_regions_num].offset;
self->ram_regions[self->ram_regions_num].snapshot_region_ptr = snapshot_ptr_offset_array[i]+MEM_SPLIT_START;
self->ram_regions[self->ram_regions_num].idstr = malloc(strlen(block->idstr) + 1);
memset(self->ram_regions[self->ram_regions_num].idstr, 0, strlen(block->idstr) + 1);
strcpy(self->ram_regions[self->ram_regions_num].idstr, block->idstr);
}
else{
self->ram_regions[self->ram_regions_num].ram_region = i;
self->ram_regions[self->ram_regions_num].base = block->mr->addr;
self->ram_regions[self->ram_regions_num].size = block->used_length;
self->ram_regions[self->ram_regions_num].offset = snapshot_ptr_offset_array[i] - snapshot_ptr_offset_array[0];
self->ram_regions[self->ram_regions_num].host_region_ptr = block->host;
self->ram_regions[self->ram_regions_num].snapshot_region_ptr = self->snapshot_ptr+self->ram_regions[self->ram_regions_num].offset;
self->ram_regions[self->ram_regions_num].idstr = malloc(strlen(block->idstr) + 1);
memset(self->ram_regions[self->ram_regions_num].idstr, 0, strlen(block->idstr) + 1);
strcpy(self->ram_regions[self->ram_regions_num].idstr, block->idstr);
}
self->ram_regions_num++;
}
}
shadow_memory_init_generic(self);
return self;
}
shadow_memory_t* shadow_memory_init_from_snapshot(const char* snapshot_folder, bool pre_snapshot){
RAMBlock *block;
RAMBlock* block_array[10];
void* snapshot_ptr_offset_array[10];
shadow_memory_t* self = malloc(sizeof(shadow_memory_t));
memset(self, 0x0, sizeof(shadow_memory_t));
/* count total memory size */
QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
self->memory_size += block->used_length;
}
/* count number of ram regions */
QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
if(!block->mr->readonly){
if(self->ram_regions_num == 0 && block->used_length >= MEM_SPLIT_START){
self->ram_regions_num++;
}
self->ram_regions_num++;
}
}
char* path_meta;
char* path_dump;
assert(asprintf(&path_meta, "%s/fast_snapshot.mem_meta", snapshot_folder) != -1);
assert(asprintf(&path_dump, "%s/fast_snapshot.mem_dump", snapshot_folder) != -1);
fast_reload_dump_head_t head;
FILE* file_mem_meta = fopen (path_meta, "r");
assert(file_mem_meta != NULL);
assert(fread(&head, sizeof(fast_reload_dump_head_t), 1, file_mem_meta) == 1);
fclose(file_mem_meta);
if(self->ram_regions_num != head.shadow_memory_regions){
fprintf(stderr, "Error: self->ram_regions_num (%d) != head.shadow_memory_regions (%d)\n", self->ram_regions_num, head.shadow_memory_regions);
exit(1);
}
//printf("LOAD -> self->ram_regions_num: %d\n", self->ram_regions_num);
FILE* file_mem_dump = fopen (path_dump, "r");
assert(file_mem_dump != NULL);
fseek(file_mem_dump, 0L, SEEK_END);
uint64_t file_mem_dump_size = ftell(file_mem_dump);
debug_fprintf(stderr, "guest_ram_size == ftell(f) => 0x%lx vs 0x%lx (%s)\n", self->memory_size, file_mem_dump_size, path_dump);
#define VGA_SIZE (16<<20)
if(self->memory_size != file_mem_dump_size){
if (file_mem_dump_size >= VGA_SIZE){
fprintf(stderr, "ERROR: guest size should be %ld MB - set it to %ld MB\n", (file_mem_dump_size-VGA_SIZE)>>20, (self->memory_size-VGA_SIZE)>>20);
exit(1);
}
else{
fprintf(stderr, "ERROR: guest size: %ld bytes\n", file_mem_dump_size);
exit(1);
}
}
assert(self->memory_size == ftell(file_mem_dump));
fseek(file_mem_dump, 0L, SEEK_SET);
fclose(file_mem_dump);
self->snapshot_ptr_fd = open(path_dump, O_RDONLY);
//printf("self->snapshot_ptr_fd: %d\n", self->snapshot_ptr_fd);
self->snapshot_ptr = mmap(0, self->memory_size, PROT_READ, MAP_SHARED, self->snapshot_ptr_fd, 0);
//printf("TRY TO MMAP : %p\n", self->snapshot_ptr);
assert(self->snapshot_ptr != (void*)-1);
madvise(self->snapshot_ptr, self->memory_size, MADV_MERGEABLE);
uint64_t offset = 0;
uint8_t i = 0;
uint8_t regions_num = 0;
QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
QEMU_PT_PRINTF(RELOAD_PREFIX, "%lx %lx %lx\t%s\t%p", block->offset, block->used_length, block->max_length, block->idstr, block->host);
//printf("%lx %lx %lx\t%s\t%p\n", block->offset, block->used_length, block->max_length, block->idstr, block->host);
block_array[i] = block;
snapshot_ptr_offset_array[i++] = self->snapshot_ptr+offset;
offset += block->used_length;
regions_num++;
}
self->ram_regions_num = 0;
for(uint8_t i = 0; i < regions_num; i++){
block = block_array[i];
if(!block->mr->readonly){
if(self->ram_regions_num == 0 && block->used_length >= MEM_SPLIT_START){
self->ram_regions[self->ram_regions_num].ram_region = i;
self->ram_regions[self->ram_regions_num].base = block->mr->addr;
self->ram_regions[self->ram_regions_num].size = MEM_SPLIT_START;
self->ram_regions[self->ram_regions_num].offset = snapshot_ptr_offset_array[i] - snapshot_ptr_offset_array[0];
self->ram_regions[self->ram_regions_num].host_region_ptr = block->host;
self->ram_regions[self->ram_regions_num].snapshot_region_ptr = self->snapshot_ptr+self->ram_regions[self->ram_regions_num].offset;
self->ram_regions[self->ram_regions_num].idstr = malloc(strlen(block->idstr) + 1);
memset(self->ram_regions[self->ram_regions_num].idstr, 0, strlen(block->idstr) + 1);
strcpy(self->ram_regions[self->ram_regions_num].idstr, block->idstr);
self->ram_regions_num++;
self->ram_regions[self->ram_regions_num].ram_region = i;
self->ram_regions[self->ram_regions_num].base = MEM_SPLIT_END;
self->ram_regions[self->ram_regions_num].size = block->used_length-MEM_SPLIT_START;
self->ram_regions[self->ram_regions_num].offset = (snapshot_ptr_offset_array[i] + MEM_SPLIT_START) - snapshot_ptr_offset_array[0];
self->ram_regions[self->ram_regions_num].host_region_ptr = block->host+MEM_SPLIT_START;
//self->ram_regions[self->ram_regions_num].snapshot_region_ptr = self->ptr+self->ram_regions[self->ram_regions_num].offset;
self->ram_regions[self->ram_regions_num].snapshot_region_ptr = snapshot_ptr_offset_array[i]+MEM_SPLIT_START;
self->ram_regions[self->ram_regions_num].idstr = malloc(strlen(block->idstr) + 1);
memset(self->ram_regions[self->ram_regions_num].idstr, 0, strlen(block->idstr) + 1);
strcpy(self->ram_regions[self->ram_regions_num].idstr, block->idstr);
}
else{
self->ram_regions[self->ram_regions_num].ram_region = i;
self->ram_regions[self->ram_regions_num].base = block->mr->addr;
self->ram_regions[self->ram_regions_num].size = block->used_length;
self->ram_regions[self->ram_regions_num].offset = snapshot_ptr_offset_array[i] - snapshot_ptr_offset_array[0];
self->ram_regions[self->ram_regions_num].host_region_ptr = block->host;
self->ram_regions[self->ram_regions_num].snapshot_region_ptr = self->snapshot_ptr+self->ram_regions[self->ram_regions_num].offset;
self->ram_regions[self->ram_regions_num].idstr = malloc(strlen(block->idstr) + 1);
memset(self->ram_regions[self->ram_regions_num].idstr, 0, strlen(block->idstr) + 1);
strcpy(self->ram_regions[self->ram_regions_num].idstr, block->idstr);
}
self->ram_regions_num++;
}
}
/* memcpy version */
/*
for(uint8_t i = 0; i < self->ram_regions_num; i++){
void* host_addr = self->ram_regions[i].host_region_ptr + 0;
void* snapshot_addr = self->ram_regions[i].snapshot_region_ptr + 0;
memcpy(host_addr, snapshot_addr, self->ram_regions[i].size);
}
*/
/* munmap + mmap version */
for(uint8_t i = 0; i < self->ram_regions_num; i++){
void* host_addr = self->ram_regions[i].host_region_ptr + 0;
assert(munmap(host_addr, self->ram_regions[i].size) != EINVAL);
assert(mmap(host_addr, self->ram_regions[i].size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_FIXED, self->snapshot_ptr_fd, self->ram_regions[i].offset) != MAP_FAILED);
}
shadow_memory_init_generic(self);
return self;
}
void shadow_memory_prepare_incremental(shadow_memory_t* self){
static int count = 0;
if(count >= RESTORE_RATE){
count = 0;
munmap(self->incremental_ptr, self->memory_size);
self->incremental_ptr = mmap(0, self->memory_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, self->snapshot_ptr_fd, 0);
shadow_memory_set_incremental_ptrs(self);
}
count++;
}
void shadow_memory_switch_snapshot(shadow_memory_t* self, bool incremental){
self->incremental_enabled = incremental;
}
void shadow_memory_restore_memory(shadow_memory_t* self){
rcu_read_lock();
uint8_t slot = 0;
uint64_t addr = 0;
for(uint64_t i = 0; i < self->root_track_pages_num; i++){
addr = self->root_track_pages_stack[i] & 0xFFFFFFFFFFFFF000;
slot = self->root_track_pages_stack[i] & 0xFFF;
memcpy(self->ram_regions[slot].host_region_ptr+addr, self->ram_regions[slot].snapshot_region_ptr+addr, TARGET_PAGE_SIZE);
memcpy(self->ram_regions[slot].incremental_region_ptr+addr, self->ram_regions[slot].snapshot_region_ptr+addr, TARGET_PAGE_SIZE);
}
self->root_track_pages_num = 0;
rcu_read_unlock();
}
/* only used in debug mode -> no need to be fast */
bool shadow_memory_is_root_page_tracked(shadow_memory_t* self, uint64_t address, uint8_t slot){
uint64_t value = (address & 0xFFFFFFFFFFFFF000) | slot;
for(uint64_t i = 0; i < self->root_track_pages_num; i++){
if(self->root_track_pages_stack[i] == value){
return true;
}
}
return false;
}
void shadow_memory_serialize(shadow_memory_t* self, const char* snapshot_folder){
char* tmp1;
char* tmp2;
assert(asprintf(&tmp1, "%s/fast_snapshot.mem_meta", snapshot_folder) != -1);
assert(asprintf(&tmp2, "%s/fast_snapshot.mem_dump", snapshot_folder) != -1);
FILE* file_mem_meta = fopen(tmp1, "w+b");
FILE* file_mem_data = fopen(tmp2, "w+b");
//} FILE* file_ptr_meta, FILE* file_ptr_data){
//assert(self);
//assert(file_ptr_meta);
//assert(file_ptr_data);
/*
debug_printf("black_list_pages_num: %lx\n", self->black_list_pages_num);
debug_printf("black_list_pages_size: %lx\n", self->black_list_pages_size);
debug_printf("black_list_pages ...\n");
for (uint64_t i = 0; i < self->black_list_pages_num; i++ ){
debug_printf("self->black_list_pages[%ld] = %lx\n", i, self->black_list_pages[i]);
}
*/
//printf("shadow_memory_regions: %d\n", self->ram_regions_num);
//debug_printf("ram_region_index: %d\n", self->ram_region_index);
/*
for (uint32_t i = 0; i < self->ram_regions_num; i++){
printf("self->shadow_memory[%d] = %lx %s\n", i, self->ram_regions[i].base, self->ram_regions[i].idstr);
}
printf("ram_size: %lx\n", self->memory_size);
*/
fast_reload_dump_head_t head;
fast_reload_dump_entry_t entry;
head.shadow_memory_regions = self->ram_regions_num;
head.ram_region_index = 0; /* due to legacy reasons */
fwrite(&head, sizeof(fast_reload_dump_head_t), 1, file_mem_meta);
for (uint64_t i = 0; i < self->ram_regions_num; i++){
memset(&entry, 0x0, sizeof(fast_reload_dump_entry_t));
entry.shadow_memory_offset = (uint64_t)self->ram_regions[i].offset;
strncpy((char*)&entry.idstr, (const char*)self->ram_regions[i].idstr, 255);
fwrite(&entry, sizeof(fast_reload_dump_entry_t), 1, file_mem_meta);
}
fwrite(self->snapshot_ptr, self->memory_size, 1, file_mem_data);
fclose(file_mem_meta);
fclose(file_mem_data);
}
static bool shadow_memory_read_page_frame(shadow_memory_t* self, uint64_t address, void* ptr, uint16_t offset, uint16_t size){
assert((offset + size) <= 0x1000);
for(uint8_t i = 0; i < self->ram_regions_num; i++){
if(address >= self->ram_regions[i].base && address < (self->ram_regions[i].base + self->ram_regions[i].size)){
void* snapshot_ptr = self->ram_regions[i].snapshot_region_ptr + (address-self->ram_regions[i].base);
memcpy(ptr, snapshot_ptr+offset, size);
return true;
}
}
return false;
}
bool shadow_memory_read_physical_memory(shadow_memory_t* self, uint64_t address, void* ptr, size_t size){
size_t bytes_left = size;
size_t copy_bytes = 0;
uint64_t current_address = address;
uint64_t offset = 0;
while (bytes_left != 0) {
/* full page */
if ((current_address & 0xFFF) == 0){
copy_bytes = 0x1000;
}
/* partial page (starting at an offset) */
else {
copy_bytes = 0x1000 - (current_address & 0xFFF);
}
/* partial page */
if (bytes_left < copy_bytes){
copy_bytes = bytes_left;
}
if (shadow_memory_read_page_frame(self, current_address & ~0xFFFULL, ptr + offset, current_address & 0xFFFULL, copy_bytes) == false){
return false;
}
current_address += copy_bytes;
offset += copy_bytes;
bytes_left = bytes_left - copy_bytes;
}
return true;
}