QEMU-Nyx-fork/nyx/kvm_nested.c
Steffen Schulz b40f7c60e9 more missing newlines, fix use of warn/error
- replace several hardcoded fprintf()
- use of warn vs error or nyx_abort()
- several more missing newlines
- fixed up messages
2022-12-15 11:23:53 +01:00

439 lines
17 KiB
C

#include "qemu/osdep.h"
#include "sysemu/kvm.h"
#include <linux/kvm.h>
#include "exec/ram_addr.h"
#include "qemu/rcu_queue.h"
#include "qemu-common.h"
#include "cpu.h"
#include "nyx/debug.h"
#include "nyx/kvm_nested.h"
#include "nyx/state/state.h"
#include "pt.h"
#define PPAGE_SIZE 0x1000
#define PENTRIES 0x200
#define PLEVEL_4_SHIFT 12
#define PLEVEL_3_SHIFT 21
#define PLEVEL_2_SHIFT 30
#define PLEVEL_1_SHIFT 39
#define SIGN_EXTEND_TRESHOLD 0x100
#define SIGN_EXTEND 0xFFFF000000000000ULL
#define PAGETABLE_MASK 0xFFFFFFFFFF000ULL
#define CHECK_BIT(var, pos) !!(((var) & (1ULL << (pos))))
struct vmcs_hdr {
uint32_t revision_id : 31;
uint32_t shadow_vmcs : 1;
};
struct __attribute__((__packed__)) vmcs12 {
/* According to the Intel spec, a VMCS region must start with the
* following two fields. Then follow implementation-specific data.
*/
struct vmcs_hdr hdr;
uint32_t abort;
uint32_t launch_state; /* set to 0 by VMCLEAR, to 1 by VMLAUNCH */
uint32_t padding[7]; /* room for future expansion */
uint64_t io_bitmap_a;
uint64_t io_bitmap_b;
uint64_t msr_bitmap;
uint64_t vm_exit_msr_store_addr;
uint64_t vm_exit_msr_load_addr;
uint64_t vm_entry_msr_load_addr;
uint64_t tsc_offset;
uint64_t virtual_apic_page_addr;
uint64_t apic_access_addr;
uint64_t posted_intr_desc_addr;
uint64_t ept_pointer;
uint64_t eoi_exit_bitmap0;
uint64_t eoi_exit_bitmap1;
uint64_t eoi_exit_bitmap2;
uint64_t eoi_exit_bitmap3;
uint64_t xss_exit_bitmap;
uint64_t guest_physical_address;
uint64_t vmcs_link_pointer;
uint64_t guest_ia32_debugctl;
uint64_t guest_ia32_pat;
uint64_t guest_ia32_efer;
uint64_t guest_ia32_perf_global_ctrl;
uint64_t guest_pdptr0;
uint64_t guest_pdptr1;
uint64_t guest_pdptr2;
uint64_t guest_pdptr3;
uint64_t guest_bndcfgs;
uint64_t host_ia32_pat;
uint64_t host_ia32_efer;
uint64_t host_ia32_perf_global_ctrl;
uint64_t vmread_bitmap;
uint64_t vmwrite_bitmap;
uint64_t vm_function_control;
uint64_t eptp_list_address;
uint64_t pml_address;
uint64_t padding64[3]; /* room for future expansion */
/*
* To allow migration of L1 (complete with its L2 guests) between
* machines of different natural widths (32 or 64 bit), we cannot have
* unsigned long fields with no explict size. We use uint64_t (aliased
* uint64_t) instead. Luckily, x86 is little-endian.
*/
uint64_t cr0_guest_host_mask;
uint64_t cr4_guest_host_mask;
uint64_t cr0_read_shadow;
uint64_t cr4_read_shadow;
uint64_t cr3_target_value0;
uint64_t cr3_target_value1;
uint64_t cr3_target_value2;
uint64_t cr3_target_value3;
uint64_t exit_qualification;
uint64_t guest_linear_address;
uint64_t guest_cr0;
uint64_t guest_cr3;
uint64_t guest_cr4;
uint64_t guest_es_base;
uint64_t guest_cs_base;
uint64_t guest_ss_base;
uint64_t guest_ds_base;
uint64_t guest_fs_base;
uint64_t guest_gs_base;
uint64_t guest_ldtr_base;
uint64_t guest_tr_base;
uint64_t guest_gdtr_base;
uint64_t guest_idtr_base;
uint64_t guest_dr7;
uint64_t guest_rsp;
uint64_t guest_rip;
uint64_t guest_rflags;
uint64_t guest_pending_dbg_exceptions;
uint64_t guest_sysenter_esp;
uint64_t guest_sysenter_eip;
uint64_t host_cr0;
uint64_t host_cr3;
uint64_t host_cr4;
uint64_t host_fs_base;
uint64_t host_gs_base;
uint64_t host_tr_base;
uint64_t host_gdtr_base;
uint64_t host_idtr_base;
uint64_t host_ia32_sysenter_esp;
uint64_t host_ia32_sysenter_eip;
uint64_t host_rsp;
uint64_t host_rip;
uint64_t paddingl[8]; /* room for future expansion */
uint32_t pin_based_vm_exec_control;
uint32_t cpu_based_vm_exec_control;
uint32_t exception_bitmap;
uint32_t page_fault_error_code_mask;
uint32_t page_fault_error_code_match;
uint32_t cr3_target_count;
uint32_t vm_exit_controls;
uint32_t vm_exit_msr_store_count;
uint32_t vm_exit_msr_load_count;
uint32_t vm_entry_controls;
uint32_t vm_entry_msr_load_count;
uint32_t vm_entry_intr_info_field;
uint32_t vm_entry_exception_error_code;
uint32_t vm_entry_instruction_len;
uint32_t tpr_threshold;
uint32_t secondary_vm_exec_control;
uint32_t vm_instruction_error;
uint32_t vm_exit_reason;
uint32_t vm_exit_intr_info;
uint32_t vm_exit_intr_error_code;
uint32_t idt_vectoring_info_field;
uint32_t idt_vectoring_error_code;
uint32_t vm_exit_instruction_len;
uint32_t vmx_instruction_info;
uint32_t guest_es_limit;
uint32_t guest_cs_limit;
uint32_t guest_ss_limit;
uint32_t guest_ds_limit;
uint32_t guest_fs_limit;
uint32_t guest_gs_limit;
uint32_t guest_ldtr_limit;
uint32_t guest_tr_limit;
uint32_t guest_gdtr_limit;
uint32_t guest_idtr_limit;
uint32_t guest_es_ar_bytes;
uint32_t guest_cs_ar_bytes;
uint32_t guest_ss_ar_bytes;
uint32_t guest_ds_ar_bytes;
uint32_t guest_fs_ar_bytes;
uint32_t guest_gs_ar_bytes;
uint32_t guest_ldtr_ar_bytes;
uint32_t guest_tr_ar_bytes;
uint32_t guest_interruptibility_info;
uint32_t guest_activity_state;
uint32_t guest_sysenter_cs;
uint32_t host_ia32_sysenter_cs;
uint32_t vmx_preemption_timer_value;
uint32_t padding32[7]; /* room for future expansion */
uint16_t virtual_processor_id;
uint16_t posted_intr_nv;
uint16_t guest_es_selector;
uint16_t guest_cs_selector;
uint16_t guest_ss_selector;
uint16_t guest_ds_selector;
uint16_t guest_fs_selector;
uint16_t guest_gs_selector;
uint16_t guest_ldtr_selector;
uint16_t guest_tr_selector;
uint16_t guest_intr_status;
uint16_t host_es_selector;
uint16_t host_cs_selector;
uint16_t host_ss_selector;
uint16_t host_ds_selector;
uint16_t host_fs_selector;
uint16_t host_gs_selector;
uint16_t host_tr_selector;
uint16_t guest_pml_index;
};
static void write_address(uint64_t address, uint64_t size, uint64_t prot)
{
static uint64_t next_address = PAGETABLE_MASK;
static uint64_t last_address = 0x0;
static uint64_t last_prot = 0;
if (address != next_address || prot != last_prot) {
/* do not print guard pages or empty pages without any permissions */
if (last_address && (CHECK_BIT(last_prot, 1) || !CHECK_BIT(last_prot, 63))) {
if (CHECK_BIT(last_prot, 1) && !CHECK_BIT(last_prot, 63)) {
nyx_debug_p(NESTED_VM_PREFIX, "%016lx - %016lx %c%c%c [WARNING]\n",
last_address, next_address,
CHECK_BIT(last_prot, 1) ? 'W' : '-',
CHECK_BIT(last_prot, 2) ? 'U' : 'K',
!CHECK_BIT(last_prot, 63) ? 'X' : '-');
} else {
nyx_debug_p(NESTED_VM_PREFIX, "%016lx - %016lx %c%c%c\n", last_address,
next_address, CHECK_BIT(last_prot, 1) ? 'W' : '-',
CHECK_BIT(last_prot, 2) ? 'U' : 'K',
!CHECK_BIT(last_prot, 63) ? 'X' : '-');
}
}
last_address = address;
}
next_address = address + size;
last_prot = prot;
}
void print_48_paging(uint64_t cr3)
{
uint64_t paging_entries_level_1[PENTRIES];
uint64_t paging_entries_level_2[PENTRIES];
uint64_t paging_entries_level_3[PENTRIES];
uint64_t paging_entries_level_4[PENTRIES];
uint64_t address_identifier_1, address_identifier_2, address_identifier_3,
address_identifier_4;
uint32_t i1, i2, i3, i4;
cpu_physical_memory_rw((cr3 & PAGETABLE_MASK), (uint8_t *)paging_entries_level_1,
PPAGE_SIZE, false);
for (i1 = 0; i1 < 512; i1++) {
if (paging_entries_level_1[i1]) {
address_identifier_1 = ((uint64_t)i1) << PLEVEL_1_SHIFT;
if (i1 & SIGN_EXTEND_TRESHOLD) {
address_identifier_1 |= SIGN_EXTEND;
}
if (CHECK_BIT(paging_entries_level_1[i1], 0))
{ /* otherwise swapped out */
cpu_physical_memory_rw((paging_entries_level_1[i1] & PAGETABLE_MASK),
(uint8_t *)paging_entries_level_2, PPAGE_SIZE,
false);
for (i2 = 0; i2 < PENTRIES; i2++) {
if (paging_entries_level_2[i2]) {
address_identifier_2 = (((uint64_t)i2) << PLEVEL_2_SHIFT) +
address_identifier_1;
if (CHECK_BIT(paging_entries_level_2[i2], 0))
{ /* otherwise swapped out */
if ((paging_entries_level_2[i2] & PAGETABLE_MASK) ==
(paging_entries_level_1[i1] & PAGETABLE_MASK))
{
/* loop */
continue;
}
if (CHECK_BIT(paging_entries_level_2[i2], 7)) {
write_address(address_identifier_2, 0x40000000,
(uint64_t)paging_entries_level_2[i2] &
((1ULL << 63) | (1ULL << 2) |
(1ULL << 1)));
} else {
/* otherwise this PDPE references a 1GB page */
cpu_physical_memory_rw((paging_entries_level_2[i2] &
PAGETABLE_MASK),
(uint8_t *)paging_entries_level_3,
PPAGE_SIZE, false);
for (i3 = 0; i3 < PENTRIES; i3++) {
if (paging_entries_level_3[i3]) {
address_identifier_3 =
(((uint64_t)i3) << PLEVEL_3_SHIFT) +
address_identifier_2;
if (CHECK_BIT(paging_entries_level_3[i3], 0))
{ /* otherwise swapped out */
if (CHECK_BIT(paging_entries_level_3[i3],
7))
{
write_address(
address_identifier_3, 0x200000,
(uint64_t)paging_entries_level_3[i3] &
((1ULL << 63) | (1ULL << 2) |
(1ULL << 1)));
} else {
cpu_physical_memory_rw(
(paging_entries_level_3[i3] &
PAGETABLE_MASK),
(uint8_t *)paging_entries_level_4,
PPAGE_SIZE, false);
for (i4 = 0; i4 < PENTRIES; i4++) {
if (paging_entries_level_4[i4]) {
address_identifier_4 =
(((uint64_t)i4)
<< PLEVEL_4_SHIFT) +
address_identifier_3;
if (CHECK_BIT(
paging_entries_level_4[i4],
0))
{
write_address(
address_identifier_4,
0x1000,
(uint64_t)paging_entries_level_4
[i4] &
((1ULL << 63) |
(1ULL << 2) |
(1ULL << 1)));
}
}
}
}
}
}
}
}
}
}
}
}
}
}
write_address(0, 0x1000, 0);
}
uint64_t get_nested_guest_rip(CPUState *cpu)
{
X86CPU *cpux86 = X86_CPU(cpu);
CPUX86State *env = &cpux86->env;
kvm_vcpu_ioctl(cpu, KVM_GET_NESTED_STATE, env->nested_state);
struct vmcs12 *saved_vmcs = (struct vmcs12 *)&(env->nested_state->data);
return saved_vmcs->guest_rip;
}
uint64_t get_nested_host_rip(CPUState *cpu)
{
X86CPU *cpux86 = X86_CPU(cpu);
CPUX86State *env = &cpux86->env;
kvm_vcpu_ioctl(cpu, KVM_GET_NESTED_STATE, env->nested_state);
struct vmcs12 *saved_vmcs = (struct vmcs12 *)&(env->nested_state->data);
return saved_vmcs->host_rip;
}
uint64_t get_nested_host_cr3(CPUState *cpu)
{
X86CPU *cpux86 = X86_CPU(cpu);
CPUX86State *env = &cpux86->env;
kvm_vcpu_ioctl(cpu, KVM_GET_NESTED_STATE, env->nested_state);
struct vmcs12 *saved_vmcs = (struct vmcs12 *)&(env->nested_state->data);
return saved_vmcs->host_cr3;
}
void set_nested_rip(CPUState *cpu, uint64_t rip)
{
X86CPU *cpux86 = X86_CPU(cpu);
CPUX86State *env = &cpux86->env;
struct vmcs12 *saved_vmcs = (struct vmcs12 *)&(env->nested_state->data);
saved_vmcs->guest_rip = rip;
}
void kvm_nested_get_info(CPUState *cpu)
{
X86CPU *cpux86 = X86_CPU(cpu);
CPUX86State *env = &cpux86->env;
kvm_vcpu_ioctl(cpu, KVM_GET_NESTED_STATE, env->nested_state);
__attribute__((unused)) struct vmcs12 *saved_vmcs =
(struct vmcs12 *)&(env->nested_state->data);
nyx_debug_p(NESTED_VM_PREFIX, "VMCS host_cr3:\t%lx\n", saved_vmcs->host_cr3);
nyx_debug_p(NESTED_VM_PREFIX, "VMCS host_cr4:\t%lx\n", saved_vmcs->host_cr4);
nyx_debug_p(NESTED_VM_PREFIX, "VMCS host_ia32_efer:\t%lx\n",
saved_vmcs->host_ia32_efer);
nyx_debug_p(NESTED_VM_PREFIX, "VMCS host_cr0:\t%lx\n", saved_vmcs->host_cr0);
return;
}
#define AREA_DESC_LEN 256
#define MAGIC_NUMBER 0x41584548U
typedef struct {
uint32_t base;
uint32_t size;
uint32_t virtual_base;
char desc[AREA_DESC_LEN];
} area_t_export_t;
typedef struct {
uint32_t magic;
uint8_t num_mmio_areas;
uint8_t num_io_areas;
uint8_t num_alloc_areas;
uint8_t padding;
} config_t;
void print_configuration(FILE *stream, void *configuration, size_t size)
{
fprintf(stream, "%s: size: %lx\n", __func__, size);
assert((size - sizeof(config_t)) % sizeof(area_t_export_t) == 0);
assert(((config_t *)configuration)->magic == MAGIC_NUMBER);
fprintf(stream, "%s: num_mmio_areas: %x\n", __func__,
((config_t *)configuration)->num_mmio_areas);
fprintf(stream, "%s: num_io_areas: %x\n", __func__,
((config_t *)configuration)->num_io_areas);
fprintf(stream, "%s: num_alloc_areas: %x\n", __func__,
((config_t *)configuration)->num_alloc_areas);
for (int i = 0; i < ((config_t *)configuration)->num_mmio_areas; i++) {
fprintf(stream, "\t-> MMIO: 0x%x (V: 0x%x) [0x%x]\t%s\n",
((area_t_export_t *)(configuration + sizeof(config_t)))[i].base,
((area_t_export_t *)(configuration + sizeof(config_t)))[i].virtual_base,
((area_t_export_t *)(configuration + sizeof(config_t)))[i].size,
((area_t_export_t *)(configuration + sizeof(config_t)))[i].desc);
}
for (int i = ((config_t *)configuration)->num_mmio_areas;
i < (((config_t *)configuration)->num_mmio_areas +
((config_t *)configuration)->num_io_areas);
i++)
{
fprintf(stream, "\t-> IO: 0x%x [0x%x]\t%s\n",
((area_t_export_t *)(configuration + sizeof(config_t)))[i].base,
((area_t_export_t *)(configuration + sizeof(config_t)))[i].size,
((area_t_export_t *)(configuration + sizeof(config_t)))[i].desc);
}
}