This adds the core of the QEMU VFIO-based PCI device assignment driver.
To make use of this driver, enable CONFIG_VFIO, CONFIG_VFIO_IOMMU_TYPE1,
and CONFIG_VFIO_PCI in your host Linux kernel config.  Load the vfio-pci
module.  To assign device 0000:05:00.0 to a guest, do the following:
for dev in $(ls /sys/bus/pci/devices/0000:05:00.0/iommu_group/devices); do
    vendor=$(cat /sys/bus/pci/devices/$dev/vendor)
    device=$(cat /sys/bus/pci/devices/$dev/device)
    if [ -e /sys/bus/pci/devices/$dev/driver ]; then
        echo $dev > /sys/bus/pci/devices/$dev/driver/unbind
    fi
    echo $vendor $device > /sys/bus/pci/drivers/vfio-pci/new_id
done
See Documentation/vfio.txt in the Linux kernel tree for further
description of IOMMU groups and VFIO.
Then launch qemu including the option:
-device vfio-pci,host=0000:05:00.0
Legacy PCI interrupts (INTx) currently makes use of a kludge where we
trap BAR accesses and assume the access is in response to an interrupt,
therefore de-asserting and unmasking the interrupt.  It's not quite as
targetted as using the EOI for this, but it's self contained and seems
to work across all architectures.  The side-effect is a significant
performance slow-down for device in INTx mode.  Some devices, like
graphics cards, don't really use their interrupt, so this can be turned
off with the x-intx=off option, which disables INTx alltogether.  This
should be considered an experimental option until we refine this code.
Both MSI and MSI-X are supported and avoid these issues.
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
		
	
			
		
			
				
	
	
		
			115 lines
		
	
	
		
			3.2 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			115 lines
		
	
	
		
			3.2 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/*
 | 
						|
 * vfio based device assignment support
 | 
						|
 *
 | 
						|
 * Copyright Red Hat, Inc. 2012
 | 
						|
 *
 | 
						|
 * Authors:
 | 
						|
 *  Alex Williamson <alex.williamson@redhat.com>
 | 
						|
 *
 | 
						|
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 | 
						|
 * the COPYING file in the top-level directory.
 | 
						|
 */
 | 
						|
 | 
						|
#ifndef HW_VFIO_PCI_INT_H
 | 
						|
#define HW_VFIO_PCI_INT_H
 | 
						|
 | 
						|
#include "qemu-common.h"
 | 
						|
#include "qemu-queue.h"
 | 
						|
#include "pci.h"
 | 
						|
#include "event_notifier.h"
 | 
						|
 | 
						|
typedef struct VFIOBAR {
 | 
						|
    off_t fd_offset; /* offset of BAR within device fd */
 | 
						|
    int fd; /* device fd, allows us to pass VFIOBAR as opaque data */
 | 
						|
    MemoryRegion mem; /* slow, read/write access */
 | 
						|
    MemoryRegion mmap_mem; /* direct mapped access */
 | 
						|
    void *mmap;
 | 
						|
    size_t size;
 | 
						|
    uint32_t flags; /* VFIO region flags (rd/wr/mmap) */
 | 
						|
    uint8_t nr; /* cache the BAR number for debug */
 | 
						|
} VFIOBAR;
 | 
						|
 | 
						|
typedef struct VFIOINTx {
 | 
						|
    bool pending; /* interrupt pending */
 | 
						|
    bool kvm_accel; /* set when QEMU bypass through KVM enabled */
 | 
						|
    uint8_t pin; /* which pin to pull for qemu_set_irq */
 | 
						|
    EventNotifier interrupt; /* eventfd triggered on interrupt */
 | 
						|
    EventNotifier unmask; /* eventfd for unmask on QEMU bypass */
 | 
						|
    PCIINTxRoute route; /* routing info for QEMU bypass */
 | 
						|
    bool disabled;
 | 
						|
    char *intx;
 | 
						|
} VFIOINTx;
 | 
						|
 | 
						|
struct VFIODevice;
 | 
						|
 | 
						|
typedef struct VFIOMSIVector {
 | 
						|
    EventNotifier interrupt; /* eventfd triggered on interrupt */
 | 
						|
    struct VFIODevice *vdev; /* back pointer to device */
 | 
						|
    int virq; /* KVM irqchip route for QEMU bypass */
 | 
						|
    bool use;
 | 
						|
} VFIOMSIVector;
 | 
						|
 | 
						|
enum {
 | 
						|
    VFIO_INT_NONE = 0,
 | 
						|
    VFIO_INT_INTx = 1,
 | 
						|
    VFIO_INT_MSI  = 2,
 | 
						|
    VFIO_INT_MSIX = 3,
 | 
						|
};
 | 
						|
 | 
						|
struct VFIOGroup;
 | 
						|
 | 
						|
typedef struct VFIOContainer {
 | 
						|
    int fd; /* /dev/vfio/vfio, empowered by the attached groups */
 | 
						|
    struct {
 | 
						|
        /* enable abstraction to support various iommu backends */
 | 
						|
        union {
 | 
						|
            MemoryListener listener; /* Used by type1 iommu */
 | 
						|
        };
 | 
						|
        void (*release)(struct VFIOContainer *);
 | 
						|
    } iommu_data;
 | 
						|
    QLIST_HEAD(, VFIOGroup) group_list;
 | 
						|
    QLIST_ENTRY(VFIOContainer) next;
 | 
						|
} VFIOContainer;
 | 
						|
 | 
						|
/* Cache of MSI-X setup plus extra mmap and memory region for split BAR map */
 | 
						|
typedef struct VFIOMSIXInfo {
 | 
						|
    uint8_t table_bar;
 | 
						|
    uint8_t pba_bar;
 | 
						|
    uint16_t entries;
 | 
						|
    uint32_t table_offset;
 | 
						|
    uint32_t pba_offset;
 | 
						|
    MemoryRegion mmap_mem;
 | 
						|
    void *mmap;
 | 
						|
} VFIOMSIXInfo;
 | 
						|
 | 
						|
typedef struct VFIODevice {
 | 
						|
    PCIDevice pdev;
 | 
						|
    int fd;
 | 
						|
    VFIOINTx intx;
 | 
						|
    unsigned int config_size;
 | 
						|
    off_t config_offset; /* Offset of config space region within device fd */
 | 
						|
    unsigned int rom_size;
 | 
						|
    off_t rom_offset; /* Offset of ROM region within device fd */
 | 
						|
    int msi_cap_size;
 | 
						|
    VFIOMSIVector *msi_vectors;
 | 
						|
    VFIOMSIXInfo *msix;
 | 
						|
    int nr_vectors; /* Number of MSI/MSIX vectors currently in use */
 | 
						|
    int interrupt; /* Current interrupt type */
 | 
						|
    VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */
 | 
						|
    PCIHostDeviceAddress host;
 | 
						|
    QLIST_ENTRY(VFIODevice) next;
 | 
						|
    struct VFIOGroup *group;
 | 
						|
    bool reset_works;
 | 
						|
} VFIODevice;
 | 
						|
 | 
						|
typedef struct VFIOGroup {
 | 
						|
    int fd;
 | 
						|
    int groupid;
 | 
						|
    VFIOContainer *container;
 | 
						|
    QLIST_HEAD(, VFIODevice) device_list;
 | 
						|
    QLIST_ENTRY(VFIOGroup) next;
 | 
						|
    QLIST_ENTRY(VFIOGroup) container_next;
 | 
						|
} VFIOGroup;
 | 
						|
 | 
						|
#endif /* HW_VFIO_PCI_INT_H */
 |