 097310b53e
			
		
	
	
		097310b53e
		
	
	
	
	
		
			
			I'll use it with block backends shortly, and the name is going to fit badly there. It's a block layer thing anyway, not just a block driver thing. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Max Reitz <mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
		
			
				
	
	
		
			1085 lines
		
	
	
		
			31 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			1085 lines
		
	
	
		
			31 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * QEMU Block driver for Archipelago
 | |
|  *
 | |
|  * Copyright (C) 2014 Chrysostomos Nanakos <cnanakos@grnet.gr>
 | |
|  *
 | |
|  * This work is licensed under the terms of the GNU GPL, version 2 or later.
 | |
|  * See the COPYING file in the top-level directory.
 | |
|  *
 | |
|  */
 | |
| 
 | |
| /*
 | |
|  * VM Image on Archipelago volume is specified like this:
 | |
|  *
 | |
|  * file.driver=archipelago,file.volume=<volumename>
 | |
|  * [,file.mport=<mapperd_port>[,file.vport=<vlmcd_port>]
 | |
|  * [,file.segment=<segment_name>]]
 | |
|  *
 | |
|  * or
 | |
|  *
 | |
|  * file=archipelago:<volumename>[/mport=<mapperd_port>[:vport=<vlmcd_port>][:
 | |
|  * segment=<segment_name>]]
 | |
|  *
 | |
|  * 'archipelago' is the protocol.
 | |
|  *
 | |
|  * 'mport' is the port number on which mapperd is listening. This is optional
 | |
|  * and if not specified, QEMU will make Archipelago to use the default port.
 | |
|  *
 | |
|  * 'vport' is the port number on which vlmcd is listening. This is optional
 | |
|  * and if not specified, QEMU will make Archipelago to use the default port.
 | |
|  *
 | |
|  * 'segment' is the name of the shared memory segment Archipelago stack
 | |
|  * is using. This is optional and if not specified, QEMU will make Archipelago
 | |
|  * to use the default value, 'archipelago'.
 | |
|  *
 | |
|  * Examples:
 | |
|  *
 | |
|  * file.driver=archipelago,file.volume=my_vm_volume
 | |
|  * file.driver=archipelago,file.volume=my_vm_volume,file.mport=123
 | |
|  * file.driver=archipelago,file.volume=my_vm_volume,file.mport=123,
 | |
|  *  file.vport=1234
 | |
|  * file.driver=archipelago,file.volume=my_vm_volume,file.mport=123,
 | |
|  *  file.vport=1234,file.segment=my_segment
 | |
|  *
 | |
|  * or
 | |
|  *
 | |
|  * file=archipelago:my_vm_volume
 | |
|  * file=archipelago:my_vm_volume/mport=123
 | |
|  * file=archipelago:my_vm_volume/mport=123:vport=1234
 | |
|  * file=archipelago:my_vm_volume/mport=123:vport=1234:segment=my_segment
 | |
|  *
 | |
|  */
 | |
| 
 | |
| #include "qemu-common.h"
 | |
| #include "block/block_int.h"
 | |
| #include "qemu/error-report.h"
 | |
| #include "qemu/thread.h"
 | |
| #include "qapi/qmp/qint.h"
 | |
| #include "qapi/qmp/qstring.h"
 | |
| #include "qapi/qmp/qjson.h"
 | |
| #include "qemu/atomic.h"
 | |
| 
 | |
| #include <inttypes.h>
 | |
| #include <xseg/xseg.h>
 | |
| #include <xseg/protocol.h>
 | |
| 
 | |
| #define MAX_REQUEST_SIZE    524288
 | |
| 
 | |
| #define ARCHIPELAGO_OPT_VOLUME      "volume"
 | |
| #define ARCHIPELAGO_OPT_SEGMENT     "segment"
 | |
| #define ARCHIPELAGO_OPT_MPORT       "mport"
 | |
| #define ARCHIPELAGO_OPT_VPORT       "vport"
 | |
| #define ARCHIPELAGO_DFL_MPORT       1001
 | |
| #define ARCHIPELAGO_DFL_VPORT       501
 | |
| 
 | |
| #define archipelagolog(fmt, ...) \
 | |
|     do {                         \
 | |
|         fprintf(stderr, "archipelago\t%-24s: " fmt, __func__, ##__VA_ARGS__); \
 | |
|     } while (0)
 | |
| 
 | |
| typedef enum {
 | |
|     ARCHIP_OP_READ,
 | |
|     ARCHIP_OP_WRITE,
 | |
|     ARCHIP_OP_FLUSH,
 | |
|     ARCHIP_OP_VOLINFO,
 | |
|     ARCHIP_OP_TRUNCATE,
 | |
| } ARCHIPCmd;
 | |
| 
 | |
| typedef struct ArchipelagoAIOCB {
 | |
|     BlockAIOCB common;
 | |
|     QEMUBH *bh;
 | |
|     struct BDRVArchipelagoState *s;
 | |
|     QEMUIOVector *qiov;
 | |
|     ARCHIPCmd cmd;
 | |
|     int status;
 | |
|     int64_t size;
 | |
|     int64_t ret;
 | |
| } ArchipelagoAIOCB;
 | |
| 
 | |
| typedef struct BDRVArchipelagoState {
 | |
|     ArchipelagoAIOCB *event_acb;
 | |
|     char *volname;
 | |
|     char *segment_name;
 | |
|     uint64_t size;
 | |
|     /* Archipelago specific */
 | |
|     struct xseg *xseg;
 | |
|     struct xseg_port *port;
 | |
|     xport srcport;
 | |
|     xport sport;
 | |
|     xport mportno;
 | |
|     xport vportno;
 | |
|     QemuMutex archip_mutex;
 | |
|     QemuCond archip_cond;
 | |
|     bool is_signaled;
 | |
|     /* Request handler specific */
 | |
|     QemuThread request_th;
 | |
|     QemuCond request_cond;
 | |
|     QemuMutex request_mutex;
 | |
|     bool th_is_signaled;
 | |
|     bool stopping;
 | |
| } BDRVArchipelagoState;
 | |
| 
 | |
| typedef struct ArchipelagoSegmentedRequest {
 | |
|     size_t count;
 | |
|     size_t total;
 | |
|     int ref;
 | |
|     int failed;
 | |
| } ArchipelagoSegmentedRequest;
 | |
| 
 | |
| typedef struct AIORequestData {
 | |
|     const char *volname;
 | |
|     off_t offset;
 | |
|     size_t size;
 | |
|     uint64_t bufidx;
 | |
|     int ret;
 | |
|     int op;
 | |
|     ArchipelagoAIOCB *aio_cb;
 | |
|     ArchipelagoSegmentedRequest *segreq;
 | |
| } AIORequestData;
 | |
| 
 | |
| static void qemu_archipelago_complete_aio(void *opaque);
 | |
| 
 | |
| static void init_local_signal(struct xseg *xseg, xport sport, xport srcport)
 | |
| {
 | |
|     if (xseg && (sport != srcport)) {
 | |
|         xseg_init_local_signal(xseg, srcport);
 | |
|         sport = srcport;
 | |
|     }
 | |
| }
 | |
| 
 | |
| static void archipelago_finish_aiocb(AIORequestData *reqdata)
 | |
| {
 | |
|     if (reqdata->aio_cb->ret != reqdata->segreq->total) {
 | |
|         reqdata->aio_cb->ret = -EIO;
 | |
|     } else if (reqdata->aio_cb->ret == reqdata->segreq->total) {
 | |
|         reqdata->aio_cb->ret = 0;
 | |
|     }
 | |
|     reqdata->aio_cb->bh = aio_bh_new(
 | |
|                         bdrv_get_aio_context(reqdata->aio_cb->common.bs),
 | |
|                         qemu_archipelago_complete_aio, reqdata
 | |
|                         );
 | |
|     qemu_bh_schedule(reqdata->aio_cb->bh);
 | |
| }
 | |
| 
 | |
| static int wait_reply(struct xseg *xseg, xport srcport, struct xseg_port *port,
 | |
|                       struct xseg_request *expected_req)
 | |
| {
 | |
|     struct xseg_request *req;
 | |
|     xseg_prepare_wait(xseg, srcport);
 | |
|     void *psd = xseg_get_signal_desc(xseg, port);
 | |
|     while (1) {
 | |
|         req = xseg_receive(xseg, srcport, X_NONBLOCK);
 | |
|         if (req) {
 | |
|             if (req != expected_req) {
 | |
|                 archipelagolog("Unknown received request\n");
 | |
|                 xseg_put_request(xseg, req, srcport);
 | |
|             } else if (!(req->state & XS_SERVED)) {
 | |
|                 return -1;
 | |
|             } else {
 | |
|                 break;
 | |
|             }
 | |
|         }
 | |
|         xseg_wait_signal(xseg, psd, 100000UL);
 | |
|     }
 | |
|     xseg_cancel_wait(xseg, srcport);
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| static void xseg_request_handler(void *state)
 | |
| {
 | |
|     BDRVArchipelagoState *s = (BDRVArchipelagoState *) state;
 | |
|     void *psd = xseg_get_signal_desc(s->xseg, s->port);
 | |
|     qemu_mutex_lock(&s->request_mutex);
 | |
| 
 | |
|     while (!s->stopping) {
 | |
|         struct xseg_request *req;
 | |
|         void *data;
 | |
|         xseg_prepare_wait(s->xseg, s->srcport);
 | |
|         req = xseg_receive(s->xseg, s->srcport, X_NONBLOCK);
 | |
|         if (req) {
 | |
|             AIORequestData *reqdata;
 | |
|             ArchipelagoSegmentedRequest *segreq;
 | |
|             xseg_get_req_data(s->xseg, req, (void **)&reqdata);
 | |
| 
 | |
|             switch (reqdata->op) {
 | |
|             case ARCHIP_OP_READ:
 | |
|                 data = xseg_get_data(s->xseg, req);
 | |
|                 segreq = reqdata->segreq;
 | |
|                 segreq->count += req->serviced;
 | |
| 
 | |
|                 qemu_iovec_from_buf(reqdata->aio_cb->qiov, reqdata->bufidx,
 | |
|                                     data,
 | |
|                                     req->serviced);
 | |
| 
 | |
|                 xseg_put_request(s->xseg, req, s->srcport);
 | |
| 
 | |
|                 if (atomic_fetch_dec(&segreq->ref) == 1) {
 | |
|                     if (!segreq->failed) {
 | |
|                         reqdata->aio_cb->ret = segreq->count;
 | |
|                         archipelago_finish_aiocb(reqdata);
 | |
|                         g_free(segreq);
 | |
|                     } else {
 | |
|                         g_free(segreq);
 | |
|                         g_free(reqdata);
 | |
|                     }
 | |
|                 } else {
 | |
|                     g_free(reqdata);
 | |
|                 }
 | |
|                 break;
 | |
|             case ARCHIP_OP_WRITE:
 | |
|             case ARCHIP_OP_FLUSH:
 | |
|                 segreq = reqdata->segreq;
 | |
|                 segreq->count += req->serviced;
 | |
|                 xseg_put_request(s->xseg, req, s->srcport);
 | |
| 
 | |
|                 if (atomic_fetch_dec(&segreq->ref) == 1) {
 | |
|                     if (!segreq->failed) {
 | |
|                         reqdata->aio_cb->ret = segreq->count;
 | |
|                         archipelago_finish_aiocb(reqdata);
 | |
|                         g_free(segreq);
 | |
|                     } else {
 | |
|                         g_free(segreq);
 | |
|                         g_free(reqdata);
 | |
|                     }
 | |
|                 } else {
 | |
|                     g_free(reqdata);
 | |
|                 }
 | |
|                 break;
 | |
|             case ARCHIP_OP_VOLINFO:
 | |
|             case ARCHIP_OP_TRUNCATE:
 | |
|                 s->is_signaled = true;
 | |
|                 qemu_cond_signal(&s->archip_cond);
 | |
|                 break;
 | |
|             }
 | |
|         } else {
 | |
|             xseg_wait_signal(s->xseg, psd, 100000UL);
 | |
|         }
 | |
|         xseg_cancel_wait(s->xseg, s->srcport);
 | |
|     }
 | |
| 
 | |
|     s->th_is_signaled = true;
 | |
|     qemu_cond_signal(&s->request_cond);
 | |
|     qemu_mutex_unlock(&s->request_mutex);
 | |
|     qemu_thread_exit(NULL);
 | |
| }
 | |
| 
 | |
| static int qemu_archipelago_xseg_init(BDRVArchipelagoState *s)
 | |
| {
 | |
|     if (xseg_initialize()) {
 | |
|         archipelagolog("Cannot initialize XSEG\n");
 | |
|         goto err_exit;
 | |
|     }
 | |
| 
 | |
|     s->xseg = xseg_join("posix", s->segment_name,
 | |
|                         "posixfd", NULL);
 | |
|     if (!s->xseg) {
 | |
|         archipelagolog("Cannot join XSEG shared memory segment\n");
 | |
|         goto err_exit;
 | |
|     }
 | |
|     s->port = xseg_bind_dynport(s->xseg);
 | |
|     s->srcport = s->port->portno;
 | |
|     init_local_signal(s->xseg, s->sport, s->srcport);
 | |
|     return 0;
 | |
| 
 | |
| err_exit:
 | |
|     return -1;
 | |
| }
 | |
| 
 | |
| static int qemu_archipelago_init(BDRVArchipelagoState *s)
 | |
| {
 | |
|     int ret;
 | |
| 
 | |
|     ret = qemu_archipelago_xseg_init(s);
 | |
|     if (ret < 0) {
 | |
|         error_report("Cannot initialize XSEG. Aborting...\n");
 | |
|         goto err_exit;
 | |
|     }
 | |
| 
 | |
|     qemu_cond_init(&s->archip_cond);
 | |
|     qemu_mutex_init(&s->archip_mutex);
 | |
|     qemu_cond_init(&s->request_cond);
 | |
|     qemu_mutex_init(&s->request_mutex);
 | |
|     s->th_is_signaled = false;
 | |
|     qemu_thread_create(&s->request_th, "xseg_io_th",
 | |
|                        (void *) xseg_request_handler,
 | |
|                        (void *) s, QEMU_THREAD_JOINABLE);
 | |
| 
 | |
| err_exit:
 | |
|     return ret;
 | |
| }
 | |
| 
 | |
| static void qemu_archipelago_complete_aio(void *opaque)
 | |
| {
 | |
|     AIORequestData *reqdata = (AIORequestData *) opaque;
 | |
|     ArchipelagoAIOCB *aio_cb = (ArchipelagoAIOCB *) reqdata->aio_cb;
 | |
| 
 | |
|     qemu_bh_delete(aio_cb->bh);
 | |
|     aio_cb->common.cb(aio_cb->common.opaque, aio_cb->ret);
 | |
|     aio_cb->status = 0;
 | |
| 
 | |
|     qemu_aio_unref(aio_cb);
 | |
|     g_free(reqdata);
 | |
| }
 | |
| 
 | |
| static void xseg_find_port(char *pstr, const char *needle, xport *aport)
 | |
| {
 | |
|     const char *a;
 | |
|     char *endptr = NULL;
 | |
|     unsigned long port;
 | |
|     if (strstart(pstr, needle, &a)) {
 | |
|         if (strlen(a) > 0) {
 | |
|             port = strtoul(a, &endptr, 10);
 | |
|             if (strlen(endptr)) {
 | |
|                 *aport = -2;
 | |
|                 return;
 | |
|             }
 | |
|             *aport = (xport) port;
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| static void xseg_find_segment(char *pstr, const char *needle,
 | |
|                               char **segment_name)
 | |
| {
 | |
|     const char *a;
 | |
|     if (strstart(pstr, needle, &a)) {
 | |
|         if (strlen(a) > 0) {
 | |
|             *segment_name = g_strdup(a);
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| static void parse_filename_opts(const char *filename, Error **errp,
 | |
|                                 char **volume, char **segment_name,
 | |
|                                 xport *mport, xport *vport)
 | |
| {
 | |
|     const char *start;
 | |
|     char *tokens[4], *ds;
 | |
|     int idx;
 | |
|     xport lmport = NoPort, lvport = NoPort;
 | |
| 
 | |
|     strstart(filename, "archipelago:", &start);
 | |
| 
 | |
|     ds = g_strdup(start);
 | |
|     tokens[0] = strtok(ds, "/");
 | |
|     tokens[1] = strtok(NULL, ":");
 | |
|     tokens[2] = strtok(NULL, ":");
 | |
|     tokens[3] = strtok(NULL, "\0");
 | |
| 
 | |
|     if (!strlen(tokens[0])) {
 | |
|         error_setg(errp, "volume name must be specified first");
 | |
|         g_free(ds);
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     for (idx = 1; idx < 4; idx++) {
 | |
|         if (tokens[idx] != NULL) {
 | |
|             if (strstart(tokens[idx], "mport=", NULL)) {
 | |
|                 xseg_find_port(tokens[idx], "mport=", &lmport);
 | |
|             }
 | |
|             if (strstart(tokens[idx], "vport=", NULL)) {
 | |
|                 xseg_find_port(tokens[idx], "vport=", &lvport);
 | |
|             }
 | |
|             if (strstart(tokens[idx], "segment=", NULL)) {
 | |
|                 xseg_find_segment(tokens[idx], "segment=", segment_name);
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     if ((lmport == -2) || (lvport == -2)) {
 | |
|         error_setg(errp, "mport and/or vport must be set");
 | |
|         g_free(ds);
 | |
|         return;
 | |
|     }
 | |
|     *volume = g_strdup(tokens[0]);
 | |
|     *mport = lmport;
 | |
|     *vport = lvport;
 | |
|     g_free(ds);
 | |
| }
 | |
| 
 | |
| static void archipelago_parse_filename(const char *filename, QDict *options,
 | |
|                                        Error **errp)
 | |
| {
 | |
|     const char *start;
 | |
|     char *volume = NULL, *segment_name = NULL;
 | |
|     xport mport = NoPort, vport = NoPort;
 | |
| 
 | |
|     if (qdict_haskey(options, ARCHIPELAGO_OPT_VOLUME)
 | |
|             || qdict_haskey(options, ARCHIPELAGO_OPT_SEGMENT)
 | |
|             || qdict_haskey(options, ARCHIPELAGO_OPT_MPORT)
 | |
|             || qdict_haskey(options, ARCHIPELAGO_OPT_VPORT)) {
 | |
|         error_setg(errp, "volume/mport/vport/segment and a file name may not"
 | |
|                          " be specified at the same time");
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     if (!strstart(filename, "archipelago:", &start)) {
 | |
|         error_setg(errp, "File name must start with 'archipelago:'");
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     if (!strlen(start) || strstart(start, "/", NULL)) {
 | |
|         error_setg(errp, "volume name must be specified");
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     parse_filename_opts(filename, errp, &volume, &segment_name, &mport, &vport);
 | |
| 
 | |
|     if (volume) {
 | |
|         qdict_put(options, ARCHIPELAGO_OPT_VOLUME, qstring_from_str(volume));
 | |
|         g_free(volume);
 | |
|     }
 | |
|     if (segment_name) {
 | |
|         qdict_put(options, ARCHIPELAGO_OPT_SEGMENT,
 | |
|                   qstring_from_str(segment_name));
 | |
|         g_free(segment_name);
 | |
|     }
 | |
|     if (mport != NoPort) {
 | |
|         qdict_put(options, ARCHIPELAGO_OPT_MPORT, qint_from_int(mport));
 | |
|     }
 | |
|     if (vport != NoPort) {
 | |
|         qdict_put(options, ARCHIPELAGO_OPT_VPORT, qint_from_int(vport));
 | |
|     }
 | |
| }
 | |
| 
 | |
| static QemuOptsList archipelago_runtime_opts = {
 | |
|     .name = "archipelago",
 | |
|     .head = QTAILQ_HEAD_INITIALIZER(archipelago_runtime_opts.head),
 | |
|     .desc = {
 | |
|         {
 | |
|             .name = ARCHIPELAGO_OPT_VOLUME,
 | |
|             .type = QEMU_OPT_STRING,
 | |
|             .help = "Name of the volume image",
 | |
|         },
 | |
|         {
 | |
|             .name = ARCHIPELAGO_OPT_SEGMENT,
 | |
|             .type = QEMU_OPT_STRING,
 | |
|             .help = "Name of the Archipelago shared memory segment",
 | |
|         },
 | |
|         {
 | |
|             .name = ARCHIPELAGO_OPT_MPORT,
 | |
|             .type = QEMU_OPT_NUMBER,
 | |
|             .help = "Archipelago mapperd port number"
 | |
|         },
 | |
|         {
 | |
|             .name = ARCHIPELAGO_OPT_VPORT,
 | |
|             .type = QEMU_OPT_NUMBER,
 | |
|             .help = "Archipelago vlmcd port number"
 | |
| 
 | |
|         },
 | |
|         { /* end of list */ }
 | |
|     },
 | |
| };
 | |
| 
 | |
| static int qemu_archipelago_open(BlockDriverState *bs,
 | |
|                                  QDict *options,
 | |
|                                  int bdrv_flags,
 | |
|                                  Error **errp)
 | |
| {
 | |
|     int ret = 0;
 | |
|     const char *volume, *segment_name;
 | |
|     QemuOpts *opts;
 | |
|     Error *local_err = NULL;
 | |
|     BDRVArchipelagoState *s = bs->opaque;
 | |
| 
 | |
|     opts = qemu_opts_create(&archipelago_runtime_opts, NULL, 0, &error_abort);
 | |
|     qemu_opts_absorb_qdict(opts, options, &local_err);
 | |
|     if (local_err) {
 | |
|         error_propagate(errp, local_err);
 | |
|         ret = -EINVAL;
 | |
|         goto err_exit;
 | |
|     }
 | |
| 
 | |
|     s->mportno = qemu_opt_get_number(opts, ARCHIPELAGO_OPT_MPORT,
 | |
|                                      ARCHIPELAGO_DFL_MPORT);
 | |
|     s->vportno = qemu_opt_get_number(opts, ARCHIPELAGO_OPT_VPORT,
 | |
|                                      ARCHIPELAGO_DFL_VPORT);
 | |
| 
 | |
|     segment_name = qemu_opt_get(opts, ARCHIPELAGO_OPT_SEGMENT);
 | |
|     if (segment_name == NULL) {
 | |
|         s->segment_name = g_strdup("archipelago");
 | |
|     } else {
 | |
|         s->segment_name = g_strdup(segment_name);
 | |
|     }
 | |
| 
 | |
|     volume = qemu_opt_get(opts, ARCHIPELAGO_OPT_VOLUME);
 | |
|     if (volume == NULL) {
 | |
|         error_setg(errp, "archipelago block driver requires the 'volume'"
 | |
|                    " option");
 | |
|         ret = -EINVAL;
 | |
|         goto err_exit;
 | |
|     }
 | |
|     s->volname = g_strdup(volume);
 | |
| 
 | |
|     /* Initialize XSEG, join shared memory segment */
 | |
|     ret = qemu_archipelago_init(s);
 | |
|     if (ret < 0) {
 | |
|         error_setg(errp, "cannot initialize XSEG and join shared "
 | |
|                    "memory segment");
 | |
|         goto err_exit;
 | |
|     }
 | |
| 
 | |
|     qemu_opts_del(opts);
 | |
|     return 0;
 | |
| 
 | |
| err_exit:
 | |
|     g_free(s->volname);
 | |
|     g_free(s->segment_name);
 | |
|     qemu_opts_del(opts);
 | |
|     return ret;
 | |
| }
 | |
| 
 | |
| static void qemu_archipelago_close(BlockDriverState *bs)
 | |
| {
 | |
|     int r, targetlen;
 | |
|     char *target;
 | |
|     struct xseg_request *req;
 | |
|     BDRVArchipelagoState *s = bs->opaque;
 | |
| 
 | |
|     s->stopping = true;
 | |
| 
 | |
|     qemu_mutex_lock(&s->request_mutex);
 | |
|     while (!s->th_is_signaled) {
 | |
|         qemu_cond_wait(&s->request_cond,
 | |
|                        &s->request_mutex);
 | |
|     }
 | |
|     qemu_mutex_unlock(&s->request_mutex);
 | |
|     qemu_thread_join(&s->request_th);
 | |
|     qemu_cond_destroy(&s->request_cond);
 | |
|     qemu_mutex_destroy(&s->request_mutex);
 | |
| 
 | |
|     qemu_cond_destroy(&s->archip_cond);
 | |
|     qemu_mutex_destroy(&s->archip_mutex);
 | |
| 
 | |
|     targetlen = strlen(s->volname);
 | |
|     req = xseg_get_request(s->xseg, s->srcport, s->vportno, X_ALLOC);
 | |
|     if (!req) {
 | |
|         archipelagolog("Cannot get XSEG request\n");
 | |
|         goto err_exit;
 | |
|     }
 | |
|     r = xseg_prep_request(s->xseg, req, targetlen, 0);
 | |
|     if (r < 0) {
 | |
|         xseg_put_request(s->xseg, req, s->srcport);
 | |
|         archipelagolog("Cannot prepare XSEG close request\n");
 | |
|         goto err_exit;
 | |
|     }
 | |
| 
 | |
|     target = xseg_get_target(s->xseg, req);
 | |
|     memcpy(target, s->volname, targetlen);
 | |
|     req->size = req->datalen;
 | |
|     req->offset = 0;
 | |
|     req->op = X_CLOSE;
 | |
| 
 | |
|     xport p = xseg_submit(s->xseg, req, s->srcport, X_ALLOC);
 | |
|     if (p == NoPort) {
 | |
|         xseg_put_request(s->xseg, req, s->srcport);
 | |
|         archipelagolog("Cannot submit XSEG close request\n");
 | |
|         goto err_exit;
 | |
|     }
 | |
| 
 | |
|     xseg_signal(s->xseg, p);
 | |
|     wait_reply(s->xseg, s->srcport, s->port, req);
 | |
| 
 | |
|     xseg_put_request(s->xseg, req, s->srcport);
 | |
| 
 | |
| err_exit:
 | |
|     g_free(s->volname);
 | |
|     g_free(s->segment_name);
 | |
|     xseg_quit_local_signal(s->xseg, s->srcport);
 | |
|     xseg_leave_dynport(s->xseg, s->port);
 | |
|     xseg_leave(s->xseg);
 | |
| }
 | |
| 
 | |
| static int qemu_archipelago_create_volume(Error **errp, const char *volname,
 | |
|                                           char *segment_name,
 | |
|                                           uint64_t size, xport mportno,
 | |
|                                           xport vportno)
 | |
| {
 | |
|     int ret, targetlen;
 | |
|     struct xseg *xseg = NULL;
 | |
|     struct xseg_request *req;
 | |
|     struct xseg_request_clone *xclone;
 | |
|     struct xseg_port *port;
 | |
|     xport srcport = NoPort, sport = NoPort;
 | |
|     char *target;
 | |
| 
 | |
|     /* Try default values if none has been set */
 | |
|     if (mportno == (xport) -1) {
 | |
|         mportno = ARCHIPELAGO_DFL_MPORT;
 | |
|     }
 | |
| 
 | |
|     if (vportno == (xport) -1) {
 | |
|         vportno = ARCHIPELAGO_DFL_VPORT;
 | |
|     }
 | |
| 
 | |
|     if (xseg_initialize()) {
 | |
|         error_setg(errp, "Cannot initialize XSEG");
 | |
|         return -1;
 | |
|     }
 | |
| 
 | |
|     xseg = xseg_join("posix", segment_name,
 | |
|                      "posixfd", NULL);
 | |
| 
 | |
|     if (!xseg) {
 | |
|         error_setg(errp, "Cannot join XSEG shared memory segment");
 | |
|         return -1;
 | |
|     }
 | |
| 
 | |
|     port = xseg_bind_dynport(xseg);
 | |
|     srcport = port->portno;
 | |
|     init_local_signal(xseg, sport, srcport);
 | |
| 
 | |
|     req = xseg_get_request(xseg, srcport, mportno, X_ALLOC);
 | |
|     if (!req) {
 | |
|         error_setg(errp, "Cannot get XSEG request");
 | |
|         return -1;
 | |
|     }
 | |
| 
 | |
|     targetlen = strlen(volname);
 | |
|     ret = xseg_prep_request(xseg, req, targetlen,
 | |
|                             sizeof(struct xseg_request_clone));
 | |
|     if (ret < 0) {
 | |
|         error_setg(errp, "Cannot prepare XSEG request");
 | |
|         goto err_exit;
 | |
|     }
 | |
| 
 | |
|     target = xseg_get_target(xseg, req);
 | |
|     if (!target) {
 | |
|         error_setg(errp, "Cannot get XSEG target.\n");
 | |
|         goto err_exit;
 | |
|     }
 | |
|     memcpy(target, volname, targetlen);
 | |
|     xclone = (struct xseg_request_clone *) xseg_get_data(xseg, req);
 | |
|     memset(xclone->target, 0 , XSEG_MAX_TARGETLEN);
 | |
|     xclone->targetlen = 0;
 | |
|     xclone->size = size;
 | |
|     req->offset = 0;
 | |
|     req->size = req->datalen;
 | |
|     req->op = X_CLONE;
 | |
| 
 | |
|     xport p = xseg_submit(xseg, req, srcport, X_ALLOC);
 | |
|     if (p == NoPort) {
 | |
|         error_setg(errp, "Could not submit XSEG request");
 | |
|         goto err_exit;
 | |
|     }
 | |
|     xseg_signal(xseg, p);
 | |
| 
 | |
|     ret = wait_reply(xseg, srcport, port, req);
 | |
|     if (ret < 0) {
 | |
|         error_setg(errp, "wait_reply() error.");
 | |
|     }
 | |
| 
 | |
|     xseg_put_request(xseg, req, srcport);
 | |
|     xseg_quit_local_signal(xseg, srcport);
 | |
|     xseg_leave_dynport(xseg, port);
 | |
|     xseg_leave(xseg);
 | |
|     return ret;
 | |
| 
 | |
| err_exit:
 | |
|     xseg_put_request(xseg, req, srcport);
 | |
|     xseg_quit_local_signal(xseg, srcport);
 | |
|     xseg_leave_dynport(xseg, port);
 | |
|     xseg_leave(xseg);
 | |
|     return -1;
 | |
| }
 | |
| 
 | |
| static int qemu_archipelago_create(const char *filename,
 | |
|                                    QemuOpts *options,
 | |
|                                    Error **errp)
 | |
| {
 | |
|     int ret = 0;
 | |
|     uint64_t total_size = 0;
 | |
|     char *volname = NULL, *segment_name = NULL;
 | |
|     const char *start;
 | |
|     xport mport = NoPort, vport = NoPort;
 | |
| 
 | |
|     if (!strstart(filename, "archipelago:", &start)) {
 | |
|         error_setg(errp, "File name must start with 'archipelago:'");
 | |
|         return -1;
 | |
|     }
 | |
| 
 | |
|     if (!strlen(start) || strstart(start, "/", NULL)) {
 | |
|         error_setg(errp, "volume name must be specified");
 | |
|         return -1;
 | |
|     }
 | |
| 
 | |
|     parse_filename_opts(filename, errp, &volname, &segment_name, &mport,
 | |
|                         &vport);
 | |
|     total_size = ROUND_UP(qemu_opt_get_size_del(options, BLOCK_OPT_SIZE, 0),
 | |
|                           BDRV_SECTOR_SIZE);
 | |
| 
 | |
|     if (segment_name == NULL) {
 | |
|         segment_name = g_strdup("archipelago");
 | |
|     }
 | |
| 
 | |
|     /* Create an Archipelago volume */
 | |
|     ret = qemu_archipelago_create_volume(errp, volname, segment_name,
 | |
|                                          total_size, mport,
 | |
|                                          vport);
 | |
| 
 | |
|     g_free(volname);
 | |
|     g_free(segment_name);
 | |
|     return ret;
 | |
| }
 | |
| 
 | |
| static const AIOCBInfo archipelago_aiocb_info = {
 | |
|     .aiocb_size = sizeof(ArchipelagoAIOCB),
 | |
| };
 | |
| 
 | |
| static int archipelago_submit_request(BDRVArchipelagoState *s,
 | |
|                                         uint64_t bufidx,
 | |
|                                         size_t count,
 | |
|                                         off_t offset,
 | |
|                                         ArchipelagoAIOCB *aio_cb,
 | |
|                                         ArchipelagoSegmentedRequest *segreq,
 | |
|                                         int op)
 | |
| {
 | |
|     int ret, targetlen;
 | |
|     char *target;
 | |
|     void *data = NULL;
 | |
|     struct xseg_request *req;
 | |
|     AIORequestData *reqdata = g_new(AIORequestData, 1);
 | |
| 
 | |
|     targetlen = strlen(s->volname);
 | |
|     req = xseg_get_request(s->xseg, s->srcport, s->vportno, X_ALLOC);
 | |
|     if (!req) {
 | |
|         archipelagolog("Cannot get XSEG request\n");
 | |
|         goto err_exit2;
 | |
|     }
 | |
|     ret = xseg_prep_request(s->xseg, req, targetlen, count);
 | |
|     if (ret < 0) {
 | |
|         archipelagolog("Cannot prepare XSEG request\n");
 | |
|         goto err_exit;
 | |
|     }
 | |
|     target = xseg_get_target(s->xseg, req);
 | |
|     if (!target) {
 | |
|         archipelagolog("Cannot get XSEG target\n");
 | |
|         goto err_exit;
 | |
|     }
 | |
|     memcpy(target, s->volname, targetlen);
 | |
|     req->size = count;
 | |
|     req->offset = offset;
 | |
| 
 | |
|     switch (op) {
 | |
|     case ARCHIP_OP_READ:
 | |
|         req->op = X_READ;
 | |
|         break;
 | |
|     case ARCHIP_OP_WRITE:
 | |
|         req->op = X_WRITE;
 | |
|         break;
 | |
|     case ARCHIP_OP_FLUSH:
 | |
|         req->op = X_FLUSH;
 | |
|         break;
 | |
|     }
 | |
|     reqdata->volname = s->volname;
 | |
|     reqdata->offset = offset;
 | |
|     reqdata->size = count;
 | |
|     reqdata->bufidx = bufidx;
 | |
|     reqdata->aio_cb = aio_cb;
 | |
|     reqdata->segreq = segreq;
 | |
|     reqdata->op = op;
 | |
| 
 | |
|     xseg_set_req_data(s->xseg, req, reqdata);
 | |
|     if (op == ARCHIP_OP_WRITE) {
 | |
|         data = xseg_get_data(s->xseg, req);
 | |
|         if (!data) {
 | |
|             archipelagolog("Cannot get XSEG data\n");
 | |
|             goto err_exit;
 | |
|         }
 | |
|         qemu_iovec_to_buf(aio_cb->qiov, bufidx, data, count);
 | |
|     }
 | |
| 
 | |
|     xport p = xseg_submit(s->xseg, req, s->srcport, X_ALLOC);
 | |
|     if (p == NoPort) {
 | |
|         archipelagolog("Could not submit XSEG request\n");
 | |
|         goto err_exit;
 | |
|     }
 | |
|     xseg_signal(s->xseg, p);
 | |
|     return 0;
 | |
| 
 | |
| err_exit:
 | |
|     g_free(reqdata);
 | |
|     xseg_put_request(s->xseg, req, s->srcport);
 | |
|     return -EIO;
 | |
| err_exit2:
 | |
|     g_free(reqdata);
 | |
|     return -EIO;
 | |
| }
 | |
| 
 | |
| static int archipelago_aio_segmented_rw(BDRVArchipelagoState *s,
 | |
|                                         size_t count,
 | |
|                                         off_t offset,
 | |
|                                         ArchipelagoAIOCB *aio_cb,
 | |
|                                         int op)
 | |
| {
 | |
|     int ret, segments_nr;
 | |
|     size_t pos = 0;
 | |
|     ArchipelagoSegmentedRequest *segreq;
 | |
| 
 | |
|     segreq = g_new0(ArchipelagoSegmentedRequest, 1);
 | |
| 
 | |
|     if (op == ARCHIP_OP_FLUSH) {
 | |
|         segments_nr = 1;
 | |
|     } else {
 | |
|         segments_nr = (int)(count / MAX_REQUEST_SIZE) + \
 | |
|                       ((count % MAX_REQUEST_SIZE) ? 1 : 0);
 | |
|     }
 | |
|     segreq->total = count;
 | |
|     atomic_mb_set(&segreq->ref, segments_nr);
 | |
| 
 | |
|     while (segments_nr > 1) {
 | |
|         ret = archipelago_submit_request(s, pos,
 | |
|                                             MAX_REQUEST_SIZE,
 | |
|                                             offset + pos,
 | |
|                                             aio_cb, segreq, op);
 | |
| 
 | |
|         if (ret < 0) {
 | |
|             goto err_exit;
 | |
|         }
 | |
|         count -= MAX_REQUEST_SIZE;
 | |
|         pos += MAX_REQUEST_SIZE;
 | |
|         segments_nr--;
 | |
|     }
 | |
|     ret = archipelago_submit_request(s, pos, count, offset + pos,
 | |
|                                      aio_cb, segreq, op);
 | |
| 
 | |
|     if (ret < 0) {
 | |
|         goto err_exit;
 | |
|     }
 | |
|     return 0;
 | |
| 
 | |
| err_exit:
 | |
|     segreq->failed = 1;
 | |
|     if (atomic_fetch_sub(&segreq->ref, segments_nr) == segments_nr) {
 | |
|         g_free(segreq);
 | |
|     }
 | |
|     return ret;
 | |
| }
 | |
| 
 | |
| static BlockAIOCB *qemu_archipelago_aio_rw(BlockDriverState *bs,
 | |
|                                            int64_t sector_num,
 | |
|                                            QEMUIOVector *qiov,
 | |
|                                            int nb_sectors,
 | |
|                                            BlockCompletionFunc *cb,
 | |
|                                            void *opaque,
 | |
|                                            int op)
 | |
| {
 | |
|     ArchipelagoAIOCB *aio_cb;
 | |
|     BDRVArchipelagoState *s = bs->opaque;
 | |
|     int64_t size, off;
 | |
|     int ret;
 | |
| 
 | |
|     aio_cb = qemu_aio_get(&archipelago_aiocb_info, bs, cb, opaque);
 | |
|     aio_cb->cmd = op;
 | |
|     aio_cb->qiov = qiov;
 | |
| 
 | |
|     aio_cb->ret = 0;
 | |
|     aio_cb->s = s;
 | |
|     aio_cb->status = -EINPROGRESS;
 | |
| 
 | |
|     off = sector_num * BDRV_SECTOR_SIZE;
 | |
|     size = nb_sectors * BDRV_SECTOR_SIZE;
 | |
|     aio_cb->size = size;
 | |
| 
 | |
|     ret = archipelago_aio_segmented_rw(s, size, off,
 | |
|                                        aio_cb, op);
 | |
|     if (ret < 0) {
 | |
|         goto err_exit;
 | |
|     }
 | |
|     return &aio_cb->common;
 | |
| 
 | |
| err_exit:
 | |
|     error_report("qemu_archipelago_aio_rw(): I/O Error\n");
 | |
|     qemu_aio_unref(aio_cb);
 | |
|     return NULL;
 | |
| }
 | |
| 
 | |
| static BlockAIOCB *qemu_archipelago_aio_readv(BlockDriverState *bs,
 | |
|         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
 | |
|         BlockCompletionFunc *cb, void *opaque)
 | |
| {
 | |
|     return qemu_archipelago_aio_rw(bs, sector_num, qiov, nb_sectors, cb,
 | |
|                                    opaque, ARCHIP_OP_READ);
 | |
| }
 | |
| 
 | |
| static BlockAIOCB *qemu_archipelago_aio_writev(BlockDriverState *bs,
 | |
|         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
 | |
|         BlockCompletionFunc *cb, void *opaque)
 | |
| {
 | |
|     return qemu_archipelago_aio_rw(bs, sector_num, qiov, nb_sectors, cb,
 | |
|                                    opaque, ARCHIP_OP_WRITE);
 | |
| }
 | |
| 
 | |
| static int64_t archipelago_volume_info(BDRVArchipelagoState *s)
 | |
| {
 | |
|     uint64_t size;
 | |
|     int ret, targetlen;
 | |
|     struct xseg_request *req;
 | |
|     struct xseg_reply_info *xinfo;
 | |
|     AIORequestData *reqdata = g_new(AIORequestData, 1);
 | |
| 
 | |
|     const char *volname = s->volname;
 | |
|     targetlen = strlen(volname);
 | |
|     req = xseg_get_request(s->xseg, s->srcport, s->mportno, X_ALLOC);
 | |
|     if (!req) {
 | |
|         archipelagolog("Cannot get XSEG request\n");
 | |
|         goto err_exit2;
 | |
|     }
 | |
|     ret = xseg_prep_request(s->xseg, req, targetlen,
 | |
|                             sizeof(struct xseg_reply_info));
 | |
|     if (ret < 0) {
 | |
|         archipelagolog("Cannot prepare XSEG request\n");
 | |
|         goto err_exit;
 | |
|     }
 | |
|     char *target = xseg_get_target(s->xseg, req);
 | |
|     if (!target) {
 | |
|         archipelagolog("Cannot get XSEG target\n");
 | |
|         goto err_exit;
 | |
|     }
 | |
|     memcpy(target, volname, targetlen);
 | |
|     req->size = req->datalen;
 | |
|     req->offset = 0;
 | |
|     req->op = X_INFO;
 | |
| 
 | |
|     reqdata->op = ARCHIP_OP_VOLINFO;
 | |
|     reqdata->volname = volname;
 | |
|     xseg_set_req_data(s->xseg, req, reqdata);
 | |
| 
 | |
|     xport p = xseg_submit(s->xseg, req, s->srcport, X_ALLOC);
 | |
|     if (p == NoPort) {
 | |
|         archipelagolog("Cannot submit XSEG request\n");
 | |
|         goto err_exit;
 | |
|     }
 | |
|     xseg_signal(s->xseg, p);
 | |
|     qemu_mutex_lock(&s->archip_mutex);
 | |
|     while (!s->is_signaled) {
 | |
|         qemu_cond_wait(&s->archip_cond, &s->archip_mutex);
 | |
|     }
 | |
|     s->is_signaled = false;
 | |
|     qemu_mutex_unlock(&s->archip_mutex);
 | |
| 
 | |
|     xinfo = (struct xseg_reply_info *) xseg_get_data(s->xseg, req);
 | |
|     size = xinfo->size;
 | |
|     xseg_put_request(s->xseg, req, s->srcport);
 | |
|     g_free(reqdata);
 | |
|     s->size = size;
 | |
|     return size;
 | |
| 
 | |
| err_exit:
 | |
|     xseg_put_request(s->xseg, req, s->srcport);
 | |
| err_exit2:
 | |
|     g_free(reqdata);
 | |
|     return -EIO;
 | |
| }
 | |
| 
 | |
| static int64_t qemu_archipelago_getlength(BlockDriverState *bs)
 | |
| {
 | |
|     int64_t ret;
 | |
|     BDRVArchipelagoState *s = bs->opaque;
 | |
| 
 | |
|     ret = archipelago_volume_info(s);
 | |
|     return ret;
 | |
| }
 | |
| 
 | |
| static int qemu_archipelago_truncate(BlockDriverState *bs, int64_t offset)
 | |
| {
 | |
|     int ret, targetlen;
 | |
|     struct xseg_request *req;
 | |
|     BDRVArchipelagoState *s = bs->opaque;
 | |
|     AIORequestData *reqdata = g_new(AIORequestData, 1);
 | |
| 
 | |
|     const char *volname = s->volname;
 | |
|     targetlen = strlen(volname);
 | |
|     req = xseg_get_request(s->xseg, s->srcport, s->mportno, X_ALLOC);
 | |
|     if (!req) {
 | |
|         archipelagolog("Cannot get XSEG request\n");
 | |
|         goto err_exit2;
 | |
|     }
 | |
| 
 | |
|     ret = xseg_prep_request(s->xseg, req, targetlen, 0);
 | |
|     if (ret < 0) {
 | |
|         archipelagolog("Cannot prepare XSEG request\n");
 | |
|         goto err_exit;
 | |
|     }
 | |
|     char *target = xseg_get_target(s->xseg, req);
 | |
|     if (!target) {
 | |
|         archipelagolog("Cannot get XSEG target\n");
 | |
|         goto err_exit;
 | |
|     }
 | |
|     memcpy(target, volname, targetlen);
 | |
|     req->offset = offset;
 | |
|     req->op = X_TRUNCATE;
 | |
| 
 | |
|     reqdata->op = ARCHIP_OP_TRUNCATE;
 | |
|     reqdata->volname = volname;
 | |
| 
 | |
|     xseg_set_req_data(s->xseg, req, reqdata);
 | |
| 
 | |
|     xport p = xseg_submit(s->xseg, req, s->srcport, X_ALLOC);
 | |
|     if (p == NoPort) {
 | |
|         archipelagolog("Cannot submit XSEG request\n");
 | |
|         goto err_exit;
 | |
|     }
 | |
| 
 | |
|     xseg_signal(s->xseg, p);
 | |
|     qemu_mutex_lock(&s->archip_mutex);
 | |
|     while (!s->is_signaled) {
 | |
|         qemu_cond_wait(&s->archip_cond, &s->archip_mutex);
 | |
|     }
 | |
|     s->is_signaled = false;
 | |
|     qemu_mutex_unlock(&s->archip_mutex);
 | |
|     xseg_put_request(s->xseg, req, s->srcport);
 | |
|     g_free(reqdata);
 | |
|     return 0;
 | |
| 
 | |
| err_exit:
 | |
|     xseg_put_request(s->xseg, req, s->srcport);
 | |
| err_exit2:
 | |
|     g_free(reqdata);
 | |
|     return -EIO;
 | |
| }
 | |
| 
 | |
| static QemuOptsList qemu_archipelago_create_opts = {
 | |
|     .name = "archipelago-create-opts",
 | |
|     .head = QTAILQ_HEAD_INITIALIZER(qemu_archipelago_create_opts.head),
 | |
|     .desc = {
 | |
|         {
 | |
|             .name = BLOCK_OPT_SIZE,
 | |
|             .type = QEMU_OPT_SIZE,
 | |
|             .help = "Virtual disk size"
 | |
|         },
 | |
|         { /* end of list */ }
 | |
|     }
 | |
| };
 | |
| 
 | |
| static BlockAIOCB *qemu_archipelago_aio_flush(BlockDriverState *bs,
 | |
|         BlockCompletionFunc *cb, void *opaque)
 | |
| {
 | |
|     return qemu_archipelago_aio_rw(bs, 0, NULL, 0, cb, opaque,
 | |
|                                    ARCHIP_OP_FLUSH);
 | |
| }
 | |
| 
 | |
| static BlockDriver bdrv_archipelago = {
 | |
|     .format_name         = "archipelago",
 | |
|     .protocol_name       = "archipelago",
 | |
|     .instance_size       = sizeof(BDRVArchipelagoState),
 | |
|     .bdrv_parse_filename = archipelago_parse_filename,
 | |
|     .bdrv_file_open      = qemu_archipelago_open,
 | |
|     .bdrv_close          = qemu_archipelago_close,
 | |
|     .bdrv_create         = qemu_archipelago_create,
 | |
|     .bdrv_getlength      = qemu_archipelago_getlength,
 | |
|     .bdrv_truncate       = qemu_archipelago_truncate,
 | |
|     .bdrv_aio_readv      = qemu_archipelago_aio_readv,
 | |
|     .bdrv_aio_writev     = qemu_archipelago_aio_writev,
 | |
|     .bdrv_aio_flush      = qemu_archipelago_aio_flush,
 | |
|     .bdrv_has_zero_init  = bdrv_has_zero_init_1,
 | |
|     .create_opts         = &qemu_archipelago_create_opts,
 | |
| };
 | |
| 
 | |
| static void bdrv_archipelago_init(void)
 | |
| {
 | |
|     bdrv_register(&bdrv_archipelago);
 | |
| }
 | |
| 
 | |
| block_init(bdrv_archipelago_init);
 |