Merge remote-tracking branch 'bonzini/iommu-for-anthony' into staging

# By Paolo Bonzini (10) and others
# Via Paolo Bonzini
* bonzini/iommu-for-anthony:
  exec: remove qemu_safe_ram_ptr
  icount: make it thread-safe
  icount: document (future) locking rules for icount
  icount: prepare the code for future races in calling qemu_clock_warp
  icount: reorganize icount_warp_rt
  icount: use cpu_get_icount() directly
  timer: add timer_mod_anticipate and timer_mod_anticipate_ns
  timer: extract timer_mod_ns_locked and timerlist_rearm
  timer: make qemu_clock_enable sync between disable and timer's cb
  qemu-thread: add QemuEvent
  timer: protect timers_state's clock with seqlock
  seqlock: introduce read-write seqlock
  vga: Mark relevant portio lists regions as coalesced MMIO flushing
  cirrus: Mark vga io region as coalesced MMIO flushing
  portio: Allow to mark portio lists as coalesced MMIO flushing
  compatfd: switch to QemuThread
  memory: fix 128 arithmetic in info mtree

Message-id: 1382024935-28297-1-git-send-email-pbonzini@redhat.com
Signed-off-by: Anthony Liguori <aliguori@amazon.com>
This commit is contained in:
Anthony Liguori 2013-10-18 10:01:49 -07:00
commit 989644915c
17 changed files with 507 additions and 148 deletions

148
cpus.c
View File

@ -37,6 +37,7 @@
#include "sysemu/qtest.h" #include "sysemu/qtest.h"
#include "qemu/main-loop.h" #include "qemu/main-loop.h"
#include "qemu/bitmap.h" #include "qemu/bitmap.h"
#include "qemu/seqlock.h"
#ifndef _WIN32 #ifndef _WIN32
#include "qemu/compatfd.h" #include "qemu/compatfd.h"
@ -97,21 +98,32 @@ static bool all_cpu_threads_idle(void)
/***********************************************************/ /***********************************************************/
/* guest cycle counter */ /* guest cycle counter */
/* Protected by TimersState seqlock */
/* Compensate for varying guest execution speed. */
static int64_t qemu_icount_bias;
static int64_t vm_clock_warp_start;
/* Conversion factor from emulated instructions to virtual clock ticks. */ /* Conversion factor from emulated instructions to virtual clock ticks. */
static int icount_time_shift; static int icount_time_shift;
/* Arbitrarily pick 1MIPS as the minimum allowable speed. */ /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
#define MAX_ICOUNT_SHIFT 10 #define MAX_ICOUNT_SHIFT 10
/* Compensate for varying guest execution speed. */
static int64_t qemu_icount_bias; /* Only written by TCG thread */
static int64_t qemu_icount;
static QEMUTimer *icount_rt_timer; static QEMUTimer *icount_rt_timer;
static QEMUTimer *icount_vm_timer; static QEMUTimer *icount_vm_timer;
static QEMUTimer *icount_warp_timer; static QEMUTimer *icount_warp_timer;
static int64_t vm_clock_warp_start;
static int64_t qemu_icount;
typedef struct TimersState { typedef struct TimersState {
/* Protected by BQL. */
int64_t cpu_ticks_prev; int64_t cpu_ticks_prev;
int64_t cpu_ticks_offset; int64_t cpu_ticks_offset;
/* cpu_clock_offset can be read out of BQL, so protect it with
* this lock.
*/
QemuSeqLock vm_clock_seqlock;
int64_t cpu_clock_offset; int64_t cpu_clock_offset;
int32_t cpu_ticks_enabled; int32_t cpu_ticks_enabled;
int64_t dummy; int64_t dummy;
@ -120,7 +132,7 @@ typedef struct TimersState {
static TimersState timers_state; static TimersState timers_state;
/* Return the virtual CPU time, based on the instruction counter. */ /* Return the virtual CPU time, based on the instruction counter. */
int64_t cpu_get_icount(void) static int64_t cpu_get_icount_locked(void)
{ {
int64_t icount; int64_t icount;
CPUState *cpu = current_cpu; CPUState *cpu = current_cpu;
@ -136,7 +148,21 @@ int64_t cpu_get_icount(void)
return qemu_icount_bias + (icount << icount_time_shift); return qemu_icount_bias + (icount << icount_time_shift);
} }
int64_t cpu_get_icount(void)
{
int64_t icount;
unsigned start;
do {
start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
icount = cpu_get_icount_locked();
} while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
return icount;
}
/* return the host CPU cycle counter and handle stop/restart */ /* return the host CPU cycle counter and handle stop/restart */
/* Caller must hold the BQL */
int64_t cpu_get_ticks(void) int64_t cpu_get_ticks(void)
{ {
if (use_icount) { if (use_icount) {
@ -157,37 +183,63 @@ int64_t cpu_get_ticks(void)
} }
} }
static int64_t cpu_get_clock_locked(void)
{
int64_t ti;
if (!timers_state.cpu_ticks_enabled) {
ti = timers_state.cpu_clock_offset;
} else {
ti = get_clock();
ti += timers_state.cpu_clock_offset;
}
return ti;
}
/* return the host CPU monotonic timer and handle stop/restart */ /* return the host CPU monotonic timer and handle stop/restart */
int64_t cpu_get_clock(void) int64_t cpu_get_clock(void)
{ {
int64_t ti; int64_t ti;
if (!timers_state.cpu_ticks_enabled) { unsigned start;
return timers_state.cpu_clock_offset;
} else { do {
ti = get_clock(); start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
return ti + timers_state.cpu_clock_offset; ti = cpu_get_clock_locked();
} } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
return ti;
} }
/* enable cpu_get_ticks() */ /* enable cpu_get_ticks()
* Caller must hold BQL which server as mutex for vm_clock_seqlock.
*/
void cpu_enable_ticks(void) void cpu_enable_ticks(void)
{ {
/* Here, the really thing protected by seqlock is cpu_clock_offset. */
seqlock_write_lock(&timers_state.vm_clock_seqlock);
if (!timers_state.cpu_ticks_enabled) { if (!timers_state.cpu_ticks_enabled) {
timers_state.cpu_ticks_offset -= cpu_get_real_ticks(); timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
timers_state.cpu_clock_offset -= get_clock(); timers_state.cpu_clock_offset -= get_clock();
timers_state.cpu_ticks_enabled = 1; timers_state.cpu_ticks_enabled = 1;
} }
seqlock_write_unlock(&timers_state.vm_clock_seqlock);
} }
/* disable cpu_get_ticks() : the clock is stopped. You must not call /* disable cpu_get_ticks() : the clock is stopped. You must not call
cpu_get_ticks() after that. */ * cpu_get_ticks() after that.
* Caller must hold BQL which server as mutex for vm_clock_seqlock.
*/
void cpu_disable_ticks(void) void cpu_disable_ticks(void)
{ {
/* Here, the really thing protected by seqlock is cpu_clock_offset. */
seqlock_write_lock(&timers_state.vm_clock_seqlock);
if (timers_state.cpu_ticks_enabled) { if (timers_state.cpu_ticks_enabled) {
timers_state.cpu_ticks_offset = cpu_get_ticks(); timers_state.cpu_ticks_offset = cpu_get_ticks();
timers_state.cpu_clock_offset = cpu_get_clock(); timers_state.cpu_clock_offset = cpu_get_clock_locked();
timers_state.cpu_ticks_enabled = 0; timers_state.cpu_ticks_enabled = 0;
} }
seqlock_write_unlock(&timers_state.vm_clock_seqlock);
} }
/* Correlation between real and virtual time is always going to be /* Correlation between real and virtual time is always going to be
@ -201,13 +253,19 @@ static void icount_adjust(void)
int64_t cur_time; int64_t cur_time;
int64_t cur_icount; int64_t cur_icount;
int64_t delta; int64_t delta;
/* Protected by TimersState mutex. */
static int64_t last_delta; static int64_t last_delta;
/* If the VM is not running, then do nothing. */ /* If the VM is not running, then do nothing. */
if (!runstate_is_running()) { if (!runstate_is_running()) {
return; return;
} }
cur_time = cpu_get_clock();
cur_icount = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); seqlock_write_lock(&timers_state.vm_clock_seqlock);
cur_time = cpu_get_clock_locked();
cur_icount = cpu_get_icount_locked();
delta = cur_icount - cur_time; delta = cur_icount - cur_time;
/* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */ /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
if (delta > 0 if (delta > 0
@ -224,6 +282,7 @@ static void icount_adjust(void)
} }
last_delta = delta; last_delta = delta;
qemu_icount_bias = cur_icount - (qemu_icount << icount_time_shift); qemu_icount_bias = cur_icount - (qemu_icount << icount_time_shift);
seqlock_write_unlock(&timers_state.vm_clock_seqlock);
} }
static void icount_adjust_rt(void *opaque) static void icount_adjust_rt(void *opaque)
@ -248,30 +307,37 @@ static int64_t qemu_icount_round(int64_t count)
static void icount_warp_rt(void *opaque) static void icount_warp_rt(void *opaque)
{ {
if (vm_clock_warp_start == -1) { /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
* changes from -1 to another value, so the race here is okay.
*/
if (atomic_read(&vm_clock_warp_start) == -1) {
return; return;
} }
seqlock_write_lock(&timers_state.vm_clock_seqlock);
if (runstate_is_running()) { if (runstate_is_running()) {
int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
int64_t warp_delta = clock - vm_clock_warp_start; int64_t warp_delta;
if (use_icount == 1) {
qemu_icount_bias += warp_delta; warp_delta = clock - vm_clock_warp_start;
} else { if (use_icount == 2) {
/* /*
* In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
* far ahead of real time. * far ahead of real time.
*/ */
int64_t cur_time = cpu_get_clock(); int64_t cur_time = cpu_get_clock_locked();
int64_t cur_icount = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); int64_t cur_icount = cpu_get_icount_locked();
int64_t delta = cur_time - cur_icount; int64_t delta = cur_time - cur_icount;
qemu_icount_bias += MIN(warp_delta, delta); warp_delta = MIN(warp_delta, delta);
}
if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
} }
qemu_icount_bias += warp_delta;
} }
vm_clock_warp_start = -1; vm_clock_warp_start = -1;
seqlock_write_unlock(&timers_state.vm_clock_seqlock);
if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
}
} }
void qtest_clock_warp(int64_t dest) void qtest_clock_warp(int64_t dest)
@ -281,7 +347,10 @@ void qtest_clock_warp(int64_t dest)
while (clock < dest) { while (clock < dest) {
int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL); int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
int64_t warp = MIN(dest - clock, deadline); int64_t warp = MIN(dest - clock, deadline);
seqlock_write_lock(&timers_state.vm_clock_seqlock);
qemu_icount_bias += warp; qemu_icount_bias += warp;
seqlock_write_unlock(&timers_state.vm_clock_seqlock);
qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL); qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
} }
@ -290,6 +359,7 @@ void qtest_clock_warp(int64_t dest)
void qemu_clock_warp(QEMUClockType type) void qemu_clock_warp(QEMUClockType type)
{ {
int64_t clock;
int64_t deadline; int64_t deadline;
/* /*
@ -309,8 +379,8 @@ void qemu_clock_warp(QEMUClockType type)
* the earliest QEMU_CLOCK_VIRTUAL timer. * the earliest QEMU_CLOCK_VIRTUAL timer.
*/ */
icount_warp_rt(NULL); icount_warp_rt(NULL);
if (!all_cpu_threads_idle() || !qemu_clock_has_timers(QEMU_CLOCK_VIRTUAL)) { timer_del(icount_warp_timer);
timer_del(icount_warp_timer); if (!all_cpu_threads_idle()) {
return; return;
} }
@ -319,17 +389,11 @@ void qemu_clock_warp(QEMUClockType type)
return; return;
} }
vm_clock_warp_start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
/* We want to use the earliest deadline from ALL vm_clocks */ /* We want to use the earliest deadline from ALL vm_clocks */
clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL); deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
if (deadline < 0) {
/* Maintain prior (possibly buggy) behaviour where if no deadline return;
* was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
* INT32_MAX nanoseconds ahead, we still use INT32_MAX
* nanoseconds.
*/
if ((deadline < 0) || (deadline > INT32_MAX)) {
deadline = INT32_MAX;
} }
if (deadline > 0) { if (deadline > 0) {
@ -350,7 +414,12 @@ void qemu_clock_warp(QEMUClockType type)
* you will not be sending network packets continuously instead of * you will not be sending network packets continuously instead of
* every 100ms. * every 100ms.
*/ */
timer_mod(icount_warp_timer, vm_clock_warp_start + deadline); seqlock_write_lock(&timers_state.vm_clock_seqlock);
if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
vm_clock_warp_start = clock;
}
seqlock_write_unlock(&timers_state.vm_clock_seqlock);
timer_mod_anticipate(icount_warp_timer, clock + deadline);
} else if (deadline == 0) { } else if (deadline == 0) {
qemu_clock_notify(QEMU_CLOCK_VIRTUAL); qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
} }
@ -371,6 +440,7 @@ static const VMStateDescription vmstate_timers = {
void configure_icount(const char *option) void configure_icount(const char *option)
{ {
seqlock_init(&timers_state.vm_clock_seqlock, NULL);
vmstate_register(NULL, 0, &vmstate_timers, &timers_state); vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
if (!option) { if (!option) {
return; return;

97
exec.c
View File

@ -129,7 +129,6 @@ static PhysPageMap next_map;
static void io_mem_init(void); static void io_mem_init(void);
static void memory_map_init(void); static void memory_map_init(void);
static void *qemu_safe_ram_ptr(ram_addr_t addr);
static MemoryRegion io_mem_watch; static MemoryRegion io_mem_watch;
#endif #endif
@ -626,22 +625,39 @@ void cpu_abort(CPUArchState *env, const char *fmt, ...)
} }
#if !defined(CONFIG_USER_ONLY) #if !defined(CONFIG_USER_ONLY)
static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
{
RAMBlock *block;
/* The list is protected by the iothread lock here. */
block = ram_list.mru_block;
if (block && addr - block->offset < block->length) {
goto found;
}
QTAILQ_FOREACH(block, &ram_list.blocks, next) {
if (addr - block->offset < block->length) {
goto found;
}
}
fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
abort();
found:
ram_list.mru_block = block;
return block;
}
static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end, static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
uintptr_t length) uintptr_t length)
{ {
uintptr_t start1; RAMBlock *block;
ram_addr_t start1;
/* we modify the TLB cache so that the dirty bit will be set again block = qemu_get_ram_block(start);
when accessing the range */ assert(block == qemu_get_ram_block(end - 1));
start1 = (uintptr_t)qemu_safe_ram_ptr(start); start1 = (uintptr_t)block->host + (start - block->offset);
/* Check that we don't span multiple blocks - this breaks the
address comparisons below. */
if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
!= (end - 1) - start) {
abort();
}
cpu_tlb_reset_dirty_all(start1, length); cpu_tlb_reset_dirty_all(start1, length);
} }
/* Note: start and end must be within the same ram block. */ /* Note: start and end must be within the same ram block. */
@ -1269,29 +1285,6 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
} }
#endif /* !_WIN32 */ #endif /* !_WIN32 */
static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
{
RAMBlock *block;
/* The list is protected by the iothread lock here. */
block = ram_list.mru_block;
if (block && addr - block->offset < block->length) {
goto found;
}
QTAILQ_FOREACH(block, &ram_list.blocks, next) {
if (addr - block->offset < block->length) {
goto found;
}
}
fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
abort();
found:
ram_list.mru_block = block;
return block;
}
/* Return a host pointer to ram allocated with qemu_ram_alloc. /* Return a host pointer to ram allocated with qemu_ram_alloc.
With the exception of the softmmu code in this file, this should With the exception of the softmmu code in this file, this should
only be used for local memory (e.g. video ram) that the device owns, only be used for local memory (e.g. video ram) that the device owns,
@ -1319,40 +1312,6 @@ void *qemu_get_ram_ptr(ram_addr_t addr)
return block->host + (addr - block->offset); return block->host + (addr - block->offset);
} }
/* Return a host pointer to ram allocated with qemu_ram_alloc. Same as
* qemu_get_ram_ptr but do not touch ram_list.mru_block.
*
* ??? Is this still necessary?
*/
static void *qemu_safe_ram_ptr(ram_addr_t addr)
{
RAMBlock *block;
/* The list is protected by the iothread lock here. */
QTAILQ_FOREACH(block, &ram_list.blocks, next) {
if (addr - block->offset < block->length) {
if (xen_enabled()) {
/* We need to check if the requested address is in the RAM
* because we don't want to map the entire memory in QEMU.
* In that case just map until the end of the page.
*/
if (block->offset == 0) {
return xen_map_cache(addr, 0, 0);
} else if (block->host == NULL) {
block->host =
xen_map_cache(block->offset, block->length, 1);
}
}
return block->host + (addr - block->offset);
}
}
fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
abort();
return NULL;
}
/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
* but takes a size argument */ * but takes a size argument */
static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size) static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)

View File

@ -2447,7 +2447,6 @@ static uint64_t cirrus_vga_ioport_read(void *opaque, hwaddr addr,
VGACommonState *s = &c->vga; VGACommonState *s = &c->vga;
int val, index; int val, index;
qemu_flush_coalesced_mmio_buffer();
addr += 0x3b0; addr += 0x3b0;
if (vga_ioport_invalid(s, addr)) { if (vga_ioport_invalid(s, addr)) {
@ -2544,7 +2543,6 @@ static void cirrus_vga_ioport_write(void *opaque, hwaddr addr, uint64_t val,
VGACommonState *s = &c->vga; VGACommonState *s = &c->vga;
int index; int index;
qemu_flush_coalesced_mmio_buffer();
addr += 0x3b0; addr += 0x3b0;
/* check port range access depending on color/monochrome mode */ /* check port range access depending on color/monochrome mode */
@ -2843,6 +2841,7 @@ static void cirrus_init_common(CirrusVGAState *s, Object *owner,
/* Register ioport 0x3b0 - 0x3df */ /* Register ioport 0x3b0 - 0x3df */
memory_region_init_io(&s->cirrus_vga_io, owner, &cirrus_vga_io_ops, s, memory_region_init_io(&s->cirrus_vga_io, owner, &cirrus_vga_io_ops, s,
"cirrus-io", 0x30); "cirrus-io", 0x30);
memory_region_set_flush_coalesced(&s->cirrus_vga_io);
memory_region_add_subregion(system_io, 0x3b0, &s->cirrus_vga_io); memory_region_add_subregion(system_io, 0x3b0, &s->cirrus_vga_io);
memory_region_init(&s->low_mem_container, owner, memory_region_init(&s->low_mem_container, owner,

View File

@ -2073,6 +2073,7 @@ static int qxl_init_primary(PCIDevice *dev)
pci_address_space(dev), pci_address_space_io(dev), false); pci_address_space(dev), pci_address_space_io(dev), false);
portio_list_init(qxl_vga_port_list, OBJECT(dev), qxl_vga_portio_list, portio_list_init(qxl_vga_port_list, OBJECT(dev), qxl_vga_portio_list,
vga, "vga"); vga, "vga");
portio_list_set_flush_coalesced(qxl_vga_port_list);
portio_list_add(qxl_vga_port_list, pci_address_space_io(dev), 0x3b0); portio_list_add(qxl_vga_port_list, pci_address_space_io(dev), 0x3b0);
vga->con = graphic_console_init(DEVICE(dev), &qxl_ops, qxl); vga->con = graphic_console_init(DEVICE(dev), &qxl_ops, qxl);

View File

@ -359,8 +359,6 @@ uint32_t vga_ioport_read(void *opaque, uint32_t addr)
VGACommonState *s = opaque; VGACommonState *s = opaque;
int val, index; int val, index;
qemu_flush_coalesced_mmio_buffer();
if (vga_ioport_invalid(s, addr)) { if (vga_ioport_invalid(s, addr)) {
val = 0xff; val = 0xff;
} else { } else {
@ -453,8 +451,6 @@ void vga_ioport_write(void *opaque, uint32_t addr, uint32_t val)
VGACommonState *s = opaque; VGACommonState *s = opaque;
int index; int index;
qemu_flush_coalesced_mmio_buffer();
/* check port range access depending on color/monochrome mode */ /* check port range access depending on color/monochrome mode */
if (vga_ioport_invalid(s, addr)) { if (vga_ioport_invalid(s, addr)) {
return; return;
@ -2373,6 +2369,7 @@ void vga_init(VGACommonState *s, Object *obj, MemoryRegion *address_space,
memory_region_set_coalescing(vga_io_memory); memory_region_set_coalescing(vga_io_memory);
if (init_vga_ports) { if (init_vga_ports) {
portio_list_init(vga_port_list, obj, vga_ports, s, "vga"); portio_list_init(vga_port_list, obj, vga_ports, s, "vga");
portio_list_set_flush_coalesced(vga_port_list);
portio_list_add(vga_port_list, address_space_io, 0x3b0); portio_list_add(vga_port_list, address_space_io, 0x3b0);
} }
if (vbe_ports) { if (vbe_ports) {

View File

@ -64,11 +64,13 @@ typedef struct PortioList {
struct MemoryRegion **regions; struct MemoryRegion **regions;
void *opaque; void *opaque;
const char *name; const char *name;
bool flush_coalesced_mmio;
} PortioList; } PortioList;
void portio_list_init(PortioList *piolist, Object *owner, void portio_list_init(PortioList *piolist, Object *owner,
const struct MemoryRegionPortio *callbacks, const struct MemoryRegionPortio *callbacks,
void *opaque, const char *name); void *opaque, const char *name);
void portio_list_set_flush_coalesced(PortioList *piolist);
void portio_list_destroy(PortioList *piolist); void portio_list_destroy(PortioList *piolist);
void portio_list_add(PortioList *piolist, void portio_list_add(PortioList *piolist,
struct MemoryRegion *address_space, struct MemoryRegion *address_space,

72
include/qemu/seqlock.h Normal file
View File

@ -0,0 +1,72 @@
/*
* Seqlock implementation for QEMU
*
* Copyright Red Hat, Inc. 2013
*
* Author:
* Paolo Bonzini <pbonzini@redhat.com>
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*
*/
#ifndef QEMU_SEQLOCK_H
#define QEMU_SEQLOCK_H 1
#include <qemu/atomic.h>
#include <qemu/thread.h>
typedef struct QemuSeqLock QemuSeqLock;
struct QemuSeqLock {
QemuMutex *mutex;
unsigned sequence;
};
static inline void seqlock_init(QemuSeqLock *sl, QemuMutex *mutex)
{
sl->mutex = mutex;
sl->sequence = 0;
}
/* Lock out other writers and update the count. */
static inline void seqlock_write_lock(QemuSeqLock *sl)
{
if (sl->mutex) {
qemu_mutex_lock(sl->mutex);
}
++sl->sequence;
/* Write sequence before updating other fields. */
smp_wmb();
}
static inline void seqlock_write_unlock(QemuSeqLock *sl)
{
/* Write other fields before finalizing sequence. */
smp_wmb();
++sl->sequence;
if (sl->mutex) {
qemu_mutex_unlock(sl->mutex);
}
}
static inline unsigned seqlock_read_begin(QemuSeqLock *sl)
{
/* Always fail if a write is in progress. */
unsigned ret = sl->sequence & ~1;
/* Read sequence before reading other fields. */
smp_rmb();
return ret;
}
static int seqlock_read_retry(const QemuSeqLock *sl, unsigned start)
{
/* Read other fields before reading final sequence. */
smp_rmb();
return unlikely(sl->sequence != start);
}
#endif

View File

@ -21,6 +21,14 @@ struct QemuSemaphore {
#endif #endif
}; };
struct QemuEvent {
#ifndef __linux__
pthread_mutex_t lock;
pthread_cond_t cond;
#endif
unsigned value;
};
struct QemuThread { struct QemuThread {
pthread_t thread; pthread_t thread;
}; };

View File

@ -17,6 +17,10 @@ struct QemuSemaphore {
HANDLE sema; HANDLE sema;
}; };
struct QemuEvent {
HANDLE event;
};
typedef struct QemuThreadData QemuThreadData; typedef struct QemuThreadData QemuThreadData;
struct QemuThread { struct QemuThread {
QemuThreadData *data; QemuThreadData *data;

View File

@ -7,6 +7,7 @@
typedef struct QemuMutex QemuMutex; typedef struct QemuMutex QemuMutex;
typedef struct QemuCond QemuCond; typedef struct QemuCond QemuCond;
typedef struct QemuSemaphore QemuSemaphore; typedef struct QemuSemaphore QemuSemaphore;
typedef struct QemuEvent QemuEvent;
typedef struct QemuThread QemuThread; typedef struct QemuThread QemuThread;
#ifdef _WIN32 #ifdef _WIN32
@ -45,6 +46,12 @@ void qemu_sem_wait(QemuSemaphore *sem);
int qemu_sem_timedwait(QemuSemaphore *sem, int ms); int qemu_sem_timedwait(QemuSemaphore *sem, int ms);
void qemu_sem_destroy(QemuSemaphore *sem); void qemu_sem_destroy(QemuSemaphore *sem);
void qemu_event_init(QemuEvent *ev, bool init);
void qemu_event_set(QemuEvent *ev);
void qemu_event_reset(QemuEvent *ev);
void qemu_event_wait(QemuEvent *ev);
void qemu_event_destroy(QemuEvent *ev);
void qemu_thread_create(QemuThread *thread, void qemu_thread_create(QemuThread *thread,
void *(*start_routine)(void *), void *(*start_routine)(void *),
void *arg, int mode); void *arg, int mode);

View File

@ -189,6 +189,12 @@ void qemu_clock_notify(QEMUClockType type);
* @enabled: true to enable, false to disable * @enabled: true to enable, false to disable
* *
* Enable or disable a clock * Enable or disable a clock
* Disabling the clock will wait for related timerlists to stop
* executing qemu_run_timers. Thus, this functions should not
* be used from the callback of a timer that is based on @clock.
* Doing so would cause a deadlock.
*
* Caller should hold BQL.
*/ */
void qemu_clock_enable(QEMUClockType type, bool enabled); void qemu_clock_enable(QEMUClockType type, bool enabled);
@ -538,6 +544,19 @@ void timer_del(QEMUTimer *ts);
*/ */
void timer_mod_ns(QEMUTimer *ts, int64_t expire_time); void timer_mod_ns(QEMUTimer *ts, int64_t expire_time);
/**
* timer_mod_anticipate_ns:
* @ts: the timer
* @expire_time: the expiry time in nanoseconds
*
* Modify a timer to expire at @expire_time or the current time,
* whichever comes earlier.
*
* This function is thread-safe but the timer and its timer list must not be
* freed while this function is running.
*/
void timer_mod_anticipate_ns(QEMUTimer *ts, int64_t expire_time);
/** /**
* timer_mod: * timer_mod:
* @ts: the timer * @ts: the timer
@ -551,6 +570,19 @@ void timer_mod_ns(QEMUTimer *ts, int64_t expire_time);
*/ */
void timer_mod(QEMUTimer *ts, int64_t expire_timer); void timer_mod(QEMUTimer *ts, int64_t expire_timer);
/**
* timer_mod_anticipate:
* @ts: the timer
* @expire_time: the expiry time in nanoseconds
*
* Modify a timer to expire at @expire_time or the current time, whichever
* comes earlier, taking into account the scale associated with the timer.
*
* This function is thread-safe but the timer and its timer list must not be
* freed while this function is running.
*/
void timer_mod_anticipate(QEMUTimer *ts, int64_t expire_time);
/** /**
* timer_pending: * timer_pending:
* @ts: the timer * @ts: the timer
@ -653,7 +685,9 @@ static inline int64_t qemu_soonest_timeout(int64_t timeout1, int64_t timeout2)
void init_clocks(void); void init_clocks(void);
int64_t cpu_get_ticks(void); int64_t cpu_get_ticks(void);
/* Caller must hold BQL */
void cpu_enable_ticks(void); void cpu_enable_ticks(void);
/* Caller must hold BQL */
void cpu_disable_ticks(void); void cpu_disable_ticks(void);
static inline int64_t get_ticks_per_sec(void) static inline int64_t get_ticks_per_sec(void)

View File

@ -139,6 +139,12 @@ void portio_list_init(PortioList *piolist,
piolist->opaque = opaque; piolist->opaque = opaque;
piolist->owner = owner; piolist->owner = owner;
piolist->name = name; piolist->name = name;
piolist->flush_coalesced_mmio = false;
}
void portio_list_set_flush_coalesced(PortioList *piolist)
{
piolist->flush_coalesced_mmio = true;
} }
void portio_list_destroy(PortioList *piolist) void portio_list_destroy(PortioList *piolist)
@ -231,6 +237,9 @@ static void portio_list_add_1(PortioList *piolist,
*/ */
memory_region_init_io(&mrpio->mr, piolist->owner, &portio_ops, mrpio, memory_region_init_io(&mrpio->mr, piolist->owner, &portio_ops, mrpio,
piolist->name, off_high - off_low); piolist->name, off_high - off_low);
if (piolist->flush_coalesced_mmio) {
memory_region_set_flush_coalesced(&mrpio->mr);
}
memory_region_add_subregion(piolist->address_space, memory_region_add_subregion(piolist->address_space,
start + off_low, &mrpio->mr); start + off_low, &mrpio->mr);
piolist->regions[piolist->nr] = &mrpio->mr; piolist->regions[piolist->nr] = &mrpio->mr;

View File

@ -1809,7 +1809,9 @@ static void mtree_print_mr(fprintf_function mon_printf, void *f,
mr->alias->name, mr->alias->name,
mr->alias_offset, mr->alias_offset,
mr->alias_offset mr->alias_offset
+ (hwaddr)int128_get64(mr->size) - 1); + (int128_nz(mr->size) ?
(hwaddr)int128_get64(int128_sub(mr->size,
int128_one())) : 0));
} else { } else {
mon_printf(f, mon_printf(f,
TARGET_FMT_plx "-" TARGET_FMT_plx " (prio %d, %c%c): %s\n", TARGET_FMT_plx "-" TARGET_FMT_plx " (prio %d, %c%c): %s\n",

View File

@ -45,6 +45,7 @@
/* timers */ /* timers */
typedef struct QEMUClock { typedef struct QEMUClock {
/* We rely on BQL to protect the timerlists */
QLIST_HEAD(, QEMUTimerList) timerlists; QLIST_HEAD(, QEMUTimerList) timerlists;
NotifierList reset_notifiers; NotifierList reset_notifiers;
@ -71,6 +72,9 @@ struct QEMUTimerList {
QLIST_ENTRY(QEMUTimerList) list; QLIST_ENTRY(QEMUTimerList) list;
QEMUTimerListNotifyCB *notify_cb; QEMUTimerListNotifyCB *notify_cb;
void *notify_opaque; void *notify_opaque;
/* lightweight method to mark the end of timerlist's running */
QemuEvent timers_done_ev;
}; };
/** /**
@ -99,6 +103,7 @@ QEMUTimerList *timerlist_new(QEMUClockType type,
QEMUClock *clock = qemu_clock_ptr(type); QEMUClock *clock = qemu_clock_ptr(type);
timer_list = g_malloc0(sizeof(QEMUTimerList)); timer_list = g_malloc0(sizeof(QEMUTimerList));
qemu_event_init(&timer_list->timers_done_ev, false);
timer_list->clock = clock; timer_list->clock = clock;
timer_list->notify_cb = cb; timer_list->notify_cb = cb;
timer_list->notify_opaque = opaque; timer_list->notify_opaque = opaque;
@ -143,13 +148,25 @@ void qemu_clock_notify(QEMUClockType type)
} }
} }
/* Disabling the clock will wait for related timerlists to stop
* executing qemu_run_timers. Thus, this functions should not
* be used from the callback of a timer that is based on @clock.
* Doing so would cause a deadlock.
*
* Caller should hold BQL.
*/
void qemu_clock_enable(QEMUClockType type, bool enabled) void qemu_clock_enable(QEMUClockType type, bool enabled)
{ {
QEMUClock *clock = qemu_clock_ptr(type); QEMUClock *clock = qemu_clock_ptr(type);
QEMUTimerList *tl;
bool old = clock->enabled; bool old = clock->enabled;
clock->enabled = enabled; clock->enabled = enabled;
if (enabled && !old) { if (enabled && !old) {
qemu_clock_notify(type); qemu_clock_notify(type);
} else if (!enabled && old) {
QLIST_FOREACH(tl, &clock->timerlists, list) {
qemu_event_wait(&tl->timers_done_ev);
}
} }
} }
@ -338,6 +355,34 @@ static void timer_del_locked(QEMUTimerList *timer_list, QEMUTimer *ts)
} }
} }
static bool timer_mod_ns_locked(QEMUTimerList *timer_list,
QEMUTimer *ts, int64_t expire_time)
{
QEMUTimer **pt, *t;
/* add the timer in the sorted list */
pt = &timer_list->active_timers;
for (;;) {
t = *pt;
if (!timer_expired_ns(t, expire_time)) {
break;
}
pt = &t->next;
}
ts->expire_time = MAX(expire_time, 0);
ts->next = *pt;
*pt = ts;
return pt == &timer_list->active_timers;
}
static void timerlist_rearm(QEMUTimerList *timer_list)
{
/* Interrupt execution to force deadline recalculation. */
qemu_clock_warp(timer_list->clock->type);
timerlist_notify(timer_list);
}
/* stop a timer, but do not dealloc it */ /* stop a timer, but do not dealloc it */
void timer_del(QEMUTimer *ts) void timer_del(QEMUTimer *ts)
{ {
@ -353,30 +398,39 @@ void timer_del(QEMUTimer *ts)
void timer_mod_ns(QEMUTimer *ts, int64_t expire_time) void timer_mod_ns(QEMUTimer *ts, int64_t expire_time)
{ {
QEMUTimerList *timer_list = ts->timer_list; QEMUTimerList *timer_list = ts->timer_list;
QEMUTimer **pt, *t; bool rearm;
qemu_mutex_lock(&timer_list->active_timers_lock); qemu_mutex_lock(&timer_list->active_timers_lock);
timer_del_locked(timer_list, ts); timer_del_locked(timer_list, ts);
rearm = timer_mod_ns_locked(timer_list, ts, expire_time);
/* add the timer in the sorted list */
pt = &timer_list->active_timers;
for(;;) {
t = *pt;
if (!timer_expired_ns(t, expire_time)) {
break;
}
pt = &t->next;
}
ts->expire_time = MAX(expire_time, 0);
ts->next = *pt;
*pt = ts;
qemu_mutex_unlock(&timer_list->active_timers_lock); qemu_mutex_unlock(&timer_list->active_timers_lock);
/* Rearm if necessary */ if (rearm) {
if (pt == &timer_list->active_timers) { timerlist_rearm(timer_list);
/* Interrupt execution to force deadline recalculation. */ }
qemu_clock_warp(timer_list->clock->type); }
timerlist_notify(timer_list);
/* modify the current timer so that it will be fired when current_time
>= expire_time or the current deadline, whichever comes earlier.
The corresponding callback will be called. */
void timer_mod_anticipate_ns(QEMUTimer *ts, int64_t expire_time)
{
QEMUTimerList *timer_list = ts->timer_list;
bool rearm;
qemu_mutex_lock(&timer_list->active_timers_lock);
if (ts->expire_time == -1 || ts->expire_time > expire_time) {
if (ts->expire_time != -1) {
timer_del_locked(timer_list, ts);
}
rearm = timer_mod_ns_locked(timer_list, ts, expire_time);
} else {
rearm = false;
}
qemu_mutex_unlock(&timer_list->active_timers_lock);
if (rearm) {
timerlist_rearm(timer_list);
} }
} }
@ -385,6 +439,11 @@ void timer_mod(QEMUTimer *ts, int64_t expire_time)
timer_mod_ns(ts, expire_time * ts->scale); timer_mod_ns(ts, expire_time * ts->scale);
} }
void timer_mod_anticipate(QEMUTimer *ts, int64_t expire_time)
{
timer_mod_anticipate_ns(ts, expire_time * ts->scale);
}
bool timer_pending(QEMUTimer *ts) bool timer_pending(QEMUTimer *ts)
{ {
return ts->expire_time >= 0; return ts->expire_time >= 0;
@ -403,8 +462,9 @@ bool timerlist_run_timers(QEMUTimerList *timer_list)
QEMUTimerCB *cb; QEMUTimerCB *cb;
void *opaque; void *opaque;
qemu_event_reset(&timer_list->timers_done_ev);
if (!timer_list->clock->enabled) { if (!timer_list->clock->enabled) {
return progress; goto out;
} }
current_time = qemu_clock_get_ns(timer_list->clock->type); current_time = qemu_clock_get_ns(timer_list->clock->type);
@ -428,6 +488,9 @@ bool timerlist_run_timers(QEMUTimerList *timer_list)
cb(opaque); cb(opaque);
progress = true; progress = true;
} }
out:
qemu_event_set(&timer_list->timers_done_ev);
return progress; return progress;
} }

View File

@ -15,9 +15,9 @@
#include "qemu-common.h" #include "qemu-common.h"
#include "qemu/compatfd.h" #include "qemu/compatfd.h"
#include "qemu/thread.h"
#include <sys/syscall.h> #include <sys/syscall.h>
#include <pthread.h>
struct sigfd_compat_info struct sigfd_compat_info
{ {
@ -28,10 +28,6 @@ struct sigfd_compat_info
static void *sigwait_compat(void *opaque) static void *sigwait_compat(void *opaque)
{ {
struct sigfd_compat_info *info = opaque; struct sigfd_compat_info *info = opaque;
sigset_t all;
sigfillset(&all);
pthread_sigmask(SIG_BLOCK, &all, NULL);
while (1) { while (1) {
int sig; int sig;
@ -71,9 +67,8 @@ static void *sigwait_compat(void *opaque)
static int qemu_signalfd_compat(const sigset_t *mask) static int qemu_signalfd_compat(const sigset_t *mask)
{ {
pthread_attr_t attr;
pthread_t tid;
struct sigfd_compat_info *info; struct sigfd_compat_info *info;
QemuThread thread;
int fds[2]; int fds[2];
info = malloc(sizeof(*info)); info = malloc(sizeof(*info));
@ -93,12 +88,7 @@ static int qemu_signalfd_compat(const sigset_t *mask)
memcpy(&info->mask, mask, sizeof(*mask)); memcpy(&info->mask, mask, sizeof(*mask));
info->fd = fds[1]; info->fd = fds[1];
pthread_attr_init(&attr); qemu_thread_create(&thread, sigwait_compat, info, QEMU_THREAD_DETACHED);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
pthread_create(&tid, &attr, sigwait_compat, info);
pthread_attr_destroy(&attr);
return fds[0]; return fds[0];
} }

View File

@ -20,7 +20,12 @@
#include <limits.h> #include <limits.h>
#include <unistd.h> #include <unistd.h>
#include <sys/time.h> #include <sys/time.h>
#ifdef __linux__
#include <sys/syscall.h>
#include <linux/futex.h>
#endif
#include "qemu/thread.h" #include "qemu/thread.h"
#include "qemu/atomic.h"
static void error_exit(int err, const char *msg) static void error_exit(int err, const char *msg)
{ {
@ -272,6 +277,117 @@ void qemu_sem_wait(QemuSemaphore *sem)
#endif #endif
} }
#ifdef __linux__
#define futex(...) syscall(__NR_futex, __VA_ARGS__)
static inline void futex_wake(QemuEvent *ev, int n)
{
futex(ev, FUTEX_WAKE, n, NULL, NULL, 0);
}
static inline void futex_wait(QemuEvent *ev, unsigned val)
{
futex(ev, FUTEX_WAIT, (int) val, NULL, NULL, 0);
}
#else
static inline void futex_wake(QemuEvent *ev, int n)
{
if (n == 1) {
pthread_cond_signal(&ev->cond);
} else {
pthread_cond_broadcast(&ev->cond);
}
}
static inline void futex_wait(QemuEvent *ev, unsigned val)
{
pthread_mutex_lock(&ev->lock);
if (ev->value == val) {
pthread_cond_wait(&ev->cond, &ev->lock);
}
pthread_mutex_unlock(&ev->lock);
}
#endif
/* Valid transitions:
* - free->set, when setting the event
* - busy->set, when setting the event, followed by futex_wake
* - set->free, when resetting the event
* - free->busy, when waiting
*
* set->busy does not happen (it can be observed from the outside but
* it really is set->free->busy).
*
* busy->free provably cannot happen; to enforce it, the set->free transition
* is done with an OR, which becomes a no-op if the event has concurrently
* transitioned to free or busy.
*/
#define EV_SET 0
#define EV_FREE 1
#define EV_BUSY -1
void qemu_event_init(QemuEvent *ev, bool init)
{
#ifndef __linux__
pthread_mutex_init(&ev->lock, NULL);
pthread_cond_init(&ev->cond, NULL);
#endif
ev->value = (init ? EV_SET : EV_FREE);
}
void qemu_event_destroy(QemuEvent *ev)
{
#ifndef __linux__
pthread_mutex_destroy(&ev->lock);
pthread_cond_destroy(&ev->cond);
#endif
}
void qemu_event_set(QemuEvent *ev)
{
if (atomic_mb_read(&ev->value) != EV_SET) {
if (atomic_xchg(&ev->value, EV_SET) == EV_BUSY) {
/* There were waiters, wake them up. */
futex_wake(ev, INT_MAX);
}
}
}
void qemu_event_reset(QemuEvent *ev)
{
if (atomic_mb_read(&ev->value) == EV_SET) {
/*
* If there was a concurrent reset (or even reset+wait),
* do nothing. Otherwise change EV_SET->EV_FREE.
*/
atomic_or(&ev->value, EV_FREE);
}
}
void qemu_event_wait(QemuEvent *ev)
{
unsigned value;
value = atomic_mb_read(&ev->value);
if (value != EV_SET) {
if (value == EV_FREE) {
/*
* Leave the event reset and tell qemu_event_set that there
* are waiters. No need to retry, because there cannot be
* a concurent busy->free transition. After the CAS, the
* event will be either set or busy.
*/
if (atomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) {
return;
}
}
futex_wait(ev, EV_BUSY);
}
}
void qemu_thread_create(QemuThread *thread, void qemu_thread_create(QemuThread *thread,
void *(*start_routine)(void*), void *(*start_routine)(void*),
void *arg, int mode) void *arg, int mode)

View File

@ -227,6 +227,32 @@ void qemu_sem_wait(QemuSemaphore *sem)
} }
} }
void qemu_event_init(QemuEvent *ev, bool init)
{
/* Manual reset. */
ev->event = CreateEvent(NULL, TRUE, init, NULL);
}
void qemu_event_destroy(QemuEvent *ev)
{
CloseHandle(ev->event);
}
void qemu_event_set(QemuEvent *ev)
{
SetEvent(ev->event);
}
void qemu_event_reset(QemuEvent *ev)
{
ResetEvent(ev->event);
}
void qemu_event_wait(QemuEvent *ev)
{
WaitForSingleObject(ev->event, INFINITE);
}
struct QemuThreadData { struct QemuThreadData {
/* Passed to win32_start_routine. */ /* Passed to win32_start_routine. */
void *(*start_routine)(void *); void *(*start_routine)(void *);