vfio: Convert to ram_block_discard_disable()
VFIO is (except devices without a physical IOMMU or some mediated devices) incompatible with discarding of RAM. The kernel will pin basically all VM memory. Let's convert to ram_block_discard_disable(), which can now fail, in contrast to qemu_balloon_inhibit(). Leave "x-balloon-allowed" named as it is for now. Reviewed-by: Tony Krowiak <akrowiak@linux.ibm.com> Acked-by: Cornelia Huck <cohuck@redhat.com> Cc: Cornelia Huck <cohuck@redhat.com> Cc: Alex Williamson <alex.williamson@redhat.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: Tony Krowiak <akrowiak@linux.ibm.com> Cc: Halil Pasic <pasic@linux.ibm.com> Cc: Pierre Morel <pmorel@linux.ibm.com> Cc: Eric Farman <farman@linux.ibm.com> Signed-off-by: David Hildenbrand <david@redhat.com> Message-Id: <20200626072248.78761-4-david@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
This commit is contained in:
parent
d24f31db3b
commit
aff92b8286
@ -105,12 +105,12 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp)
|
|||||||
vapdev->vdev.dev = dev;
|
vapdev->vdev.dev = dev;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* vfio-ap devices operate in a way compatible with
|
* vfio-ap devices operate in a way compatible with discarding of
|
||||||
* memory ballooning, as no pages are pinned in the host.
|
* memory in RAM blocks, as no pages are pinned in the host.
|
||||||
* This needs to be set before vfio_get_device() for vfio common to
|
* This needs to be set before vfio_get_device() for vfio common to
|
||||||
* handle the balloon inhibitor.
|
* handle ram_block_discard_disable().
|
||||||
*/
|
*/
|
||||||
vapdev->vdev.balloon_allowed = true;
|
vapdev->vdev.ram_block_discard_allowed = true;
|
||||||
|
|
||||||
ret = vfio_get_device(vfio_group, mdevid, &vapdev->vdev, errp);
|
ret = vfio_get_device(vfio_group, mdevid, &vapdev->vdev, errp);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
|
@ -574,12 +574,13 @@ static void vfio_ccw_get_device(VFIOGroup *group, VFIOCCWDevice *vcdev,
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* All vfio-ccw devices are believed to operate in a way compatible with
|
* All vfio-ccw devices are believed to operate in a way compatible with
|
||||||
* memory ballooning, ie. pages pinned in the host are in the current
|
* discarding of memory in RAM blocks, ie. pages pinned in the host are
|
||||||
* working set of the guest driver and therefore never overlap with pages
|
* in the current working set of the guest driver and therefore never
|
||||||
* available to the guest balloon driver. This needs to be set before
|
* overlap e.g., with pages available to the guest balloon driver. This
|
||||||
* vfio_get_device() for vfio common to handle the balloon inhibitor.
|
* needs to be set before vfio_get_device() for vfio common to handle
|
||||||
|
* ram_block_discard_disable().
|
||||||
*/
|
*/
|
||||||
vcdev->vdev.balloon_allowed = true;
|
vcdev->vdev.ram_block_discard_allowed = true;
|
||||||
|
|
||||||
if (vfio_get_device(group, vcdev->cdev.mdevid, &vcdev->vdev, errp)) {
|
if (vfio_get_device(group, vcdev->cdev.mdevid, &vcdev->vdev, errp)) {
|
||||||
goto out_err;
|
goto out_err;
|
||||||
|
@ -33,7 +33,6 @@
|
|||||||
#include "qemu/error-report.h"
|
#include "qemu/error-report.h"
|
||||||
#include "qemu/main-loop.h"
|
#include "qemu/main-loop.h"
|
||||||
#include "qemu/range.h"
|
#include "qemu/range.h"
|
||||||
#include "sysemu/balloon.h"
|
|
||||||
#include "sysemu/kvm.h"
|
#include "sysemu/kvm.h"
|
||||||
#include "sysemu/reset.h"
|
#include "sysemu/reset.h"
|
||||||
#include "trace.h"
|
#include "trace.h"
|
||||||
@ -1215,31 +1214,36 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
|
|||||||
space = vfio_get_address_space(as);
|
space = vfio_get_address_space(as);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* VFIO is currently incompatible with memory ballooning insofar as the
|
* VFIO is currently incompatible with discarding of RAM insofar as the
|
||||||
* madvise to purge (zap) the page from QEMU's address space does not
|
* madvise to purge (zap) the page from QEMU's address space does not
|
||||||
* interact with the memory API and therefore leaves stale virtual to
|
* interact with the memory API and therefore leaves stale virtual to
|
||||||
* physical mappings in the IOMMU if the page was previously pinned. We
|
* physical mappings in the IOMMU if the page was previously pinned. We
|
||||||
* therefore add a balloon inhibit for each group added to a container,
|
* therefore set discarding broken for each group added to a container,
|
||||||
* whether the container is used individually or shared. This provides
|
* whether the container is used individually or shared. This provides
|
||||||
* us with options to allow devices within a group to opt-in and allow
|
* us with options to allow devices within a group to opt-in and allow
|
||||||
* ballooning, so long as it is done consistently for a group (for instance
|
* discarding, so long as it is done consistently for a group (for instance
|
||||||
* if the device is an mdev device where it is known that the host vendor
|
* if the device is an mdev device where it is known that the host vendor
|
||||||
* driver will never pin pages outside of the working set of the guest
|
* driver will never pin pages outside of the working set of the guest
|
||||||
* driver, which would thus not be ballooning candidates).
|
* driver, which would thus not be discarding candidates).
|
||||||
*
|
*
|
||||||
* The first opportunity to induce pinning occurs here where we attempt to
|
* The first opportunity to induce pinning occurs here where we attempt to
|
||||||
* attach the group to existing containers within the AddressSpace. If any
|
* attach the group to existing containers within the AddressSpace. If any
|
||||||
* pages are already zapped from the virtual address space, such as from a
|
* pages are already zapped from the virtual address space, such as from
|
||||||
* previous ballooning opt-in, new pinning will cause valid mappings to be
|
* previous discards, new pinning will cause valid mappings to be
|
||||||
* re-established. Likewise, when the overall MemoryListener for a new
|
* re-established. Likewise, when the overall MemoryListener for a new
|
||||||
* container is registered, a replay of mappings within the AddressSpace
|
* container is registered, a replay of mappings within the AddressSpace
|
||||||
* will occur, re-establishing any previously zapped pages as well.
|
* will occur, re-establishing any previously zapped pages as well.
|
||||||
*
|
*
|
||||||
* NB. Balloon inhibiting does not currently block operation of the
|
* Especially virtio-balloon is currently only prevented from discarding
|
||||||
* balloon driver or revoke previously pinned pages, it only prevents
|
* new memory, it will not yet set ram_block_discard_set_required() and
|
||||||
* calling madvise to modify the virtual mapping of ballooned pages.
|
* therefore, neither stops us here or deals with the sudden memory
|
||||||
|
* consumption of inflated memory.
|
||||||
*/
|
*/
|
||||||
qemu_balloon_inhibit(true);
|
ret = ram_block_discard_disable(true);
|
||||||
|
if (ret) {
|
||||||
|
error_setg_errno(errp, -ret, "Cannot set discarding of RAM broken");
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
QLIST_FOREACH(container, &space->containers, next) {
|
QLIST_FOREACH(container, &space->containers, next) {
|
||||||
if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
|
if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
|
||||||
@ -1405,7 +1409,7 @@ close_fd_exit:
|
|||||||
close(fd);
|
close(fd);
|
||||||
|
|
||||||
put_space_exit:
|
put_space_exit:
|
||||||
qemu_balloon_inhibit(false);
|
ram_block_discard_disable(false);
|
||||||
vfio_put_address_space(space);
|
vfio_put_address_space(space);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
@ -1526,8 +1530,8 @@ void vfio_put_group(VFIOGroup *group)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!group->balloon_allowed) {
|
if (!group->ram_block_discard_allowed) {
|
||||||
qemu_balloon_inhibit(false);
|
ram_block_discard_disable(false);
|
||||||
}
|
}
|
||||||
vfio_kvm_device_del_group(group);
|
vfio_kvm_device_del_group(group);
|
||||||
vfio_disconnect_container(group);
|
vfio_disconnect_container(group);
|
||||||
@ -1565,22 +1569,23 @@ int vfio_get_device(VFIOGroup *group, const char *name,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Clear the balloon inhibitor for this group if the driver knows the
|
* Set discarding of RAM as not broken for this group if the driver knows
|
||||||
* device operates compatibly with ballooning. Setting must be consistent
|
* the device operates compatibly with discarding. Setting must be
|
||||||
* per group, but since compatibility is really only possible with mdev
|
* consistent per group, but since compatibility is really only possible
|
||||||
* currently, we expect singleton groups.
|
* with mdev currently, we expect singleton groups.
|
||||||
*/
|
*/
|
||||||
if (vbasedev->balloon_allowed != group->balloon_allowed) {
|
if (vbasedev->ram_block_discard_allowed !=
|
||||||
|
group->ram_block_discard_allowed) {
|
||||||
if (!QLIST_EMPTY(&group->device_list)) {
|
if (!QLIST_EMPTY(&group->device_list)) {
|
||||||
error_setg(errp,
|
error_setg(errp, "Inconsistent setting of support for discarding "
|
||||||
"Inconsistent device balloon setting within group");
|
"RAM (e.g., balloon) within group");
|
||||||
close(fd);
|
close(fd);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!group->balloon_allowed) {
|
if (!group->ram_block_discard_allowed) {
|
||||||
group->balloon_allowed = true;
|
group->ram_block_discard_allowed = true;
|
||||||
qemu_balloon_inhibit(false);
|
ram_block_discard_disable(false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2789,7 +2789,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Mediated devices *might* operate compatibly with memory ballooning, but
|
* Mediated devices *might* operate compatibly with discarding of RAM, but
|
||||||
* we cannot know for certain, it depends on whether the mdev vendor driver
|
* we cannot know for certain, it depends on whether the mdev vendor driver
|
||||||
* stays in sync with the active working set of the guest driver. Prevent
|
* stays in sync with the active working set of the guest driver. Prevent
|
||||||
* the x-balloon-allowed option unless this is minimally an mdev device.
|
* the x-balloon-allowed option unless this is minimally an mdev device.
|
||||||
@ -2802,7 +2802,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
|
|||||||
|
|
||||||
trace_vfio_mdev(vdev->vbasedev.name, is_mdev);
|
trace_vfio_mdev(vdev->vbasedev.name, is_mdev);
|
||||||
|
|
||||||
if (vdev->vbasedev.balloon_allowed && !is_mdev) {
|
if (vdev->vbasedev.ram_block_discard_allowed && !is_mdev) {
|
||||||
error_setg(errp, "x-balloon-allowed only potentially compatible "
|
error_setg(errp, "x-balloon-allowed only potentially compatible "
|
||||||
"with mdev devices");
|
"with mdev devices");
|
||||||
vfio_put_group(group);
|
vfio_put_group(group);
|
||||||
@ -3156,7 +3156,7 @@ static Property vfio_pci_dev_properties[] = {
|
|||||||
VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false),
|
VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false),
|
||||||
DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false),
|
DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false),
|
||||||
DEFINE_PROP_BOOL("x-balloon-allowed", VFIOPCIDevice,
|
DEFINE_PROP_BOOL("x-balloon-allowed", VFIOPCIDevice,
|
||||||
vbasedev.balloon_allowed, false),
|
vbasedev.ram_block_discard_allowed, false),
|
||||||
DEFINE_PROP_BOOL("x-no-kvm-intx", VFIOPCIDevice, no_kvm_intx, false),
|
DEFINE_PROP_BOOL("x-no-kvm-intx", VFIOPCIDevice, no_kvm_intx, false),
|
||||||
DEFINE_PROP_BOOL("x-no-kvm-msi", VFIOPCIDevice, no_kvm_msi, false),
|
DEFINE_PROP_BOOL("x-no-kvm-msi", VFIOPCIDevice, no_kvm_msi, false),
|
||||||
DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false),
|
DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false),
|
||||||
|
@ -108,7 +108,7 @@ typedef struct VFIODevice {
|
|||||||
bool reset_works;
|
bool reset_works;
|
||||||
bool needs_reset;
|
bool needs_reset;
|
||||||
bool no_mmap;
|
bool no_mmap;
|
||||||
bool balloon_allowed;
|
bool ram_block_discard_allowed;
|
||||||
VFIODeviceOps *ops;
|
VFIODeviceOps *ops;
|
||||||
unsigned int num_irqs;
|
unsigned int num_irqs;
|
||||||
unsigned int num_regions;
|
unsigned int num_regions;
|
||||||
@ -128,7 +128,7 @@ typedef struct VFIOGroup {
|
|||||||
QLIST_HEAD(, VFIODevice) device_list;
|
QLIST_HEAD(, VFIODevice) device_list;
|
||||||
QLIST_ENTRY(VFIOGroup) next;
|
QLIST_ENTRY(VFIOGroup) next;
|
||||||
QLIST_ENTRY(VFIOGroup) container_next;
|
QLIST_ENTRY(VFIOGroup) container_next;
|
||||||
bool balloon_allowed;
|
bool ram_block_discard_allowed;
|
||||||
} VFIOGroup;
|
} VFIOGroup;
|
||||||
|
|
||||||
typedef struct VFIODMABuf {
|
typedef struct VFIODMABuf {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user