Skip to content

Commit 1a6ed33

Browse files
tvelocitygkurz
authored andcommitted
9p: Added virtfs option 'multidevs=remap|forbid|warn'
'warn' (default): Only log an error message (once) on host if more than one device is shared by same export, except of that just ignore this config error though. This is the default behaviour for not breaking existing installations implying that they really know what they are doing. 'forbid': Like 'warn', but except of just logging an error this also denies access of guest to additional devices. 'remap': Allows to share more than one device per export by remapping inodes from host to guest appropriately. To support multiple devices on the 9p share, and avoid qid path collisions we take the device id as input to generate a unique QID path. The lowest 48 bits of the path will be set equal to the file inode, and the top bits will be uniquely assigned based on the top 16 bits of the inode and the device id. Signed-off-by: Antonios Motakis <[email protected]> [CS: - Rebased to https://github.com/gkurz/qemu/commits/9p-next (SHA1 7fc4c49e91). - Added virtfs option 'multidevs', original patch simply did the inode remapping without being asked. - Updated hash calls to new xxhash API. - Updated docs for new option 'multidevs'. - Fixed v9fs_do_readdir() not having remapped inodes. - Log error message when running out of prefixes in qid_path_prefixmap(). - Fixed definition of QPATH_INO_MASK. - Wrapped qpp_table initialization to dedicated qpp_table_init() function. - Dropped unnecessary parantheses in qpp_lookup_func(). - Dropped unnecessary g_malloc0() result checks. ] Signed-off-by: Christian Schoenebeck <[email protected]> [groug: - Moved "multidevs" parsing to the local backend. - Added hint to invalid multidevs option error. - Turn "remap" into "x-remap". ] Signed-off-by: Greg Kurz <[email protected]>
1 parent 3b5ee9e commit 1a6ed33

File tree

8 files changed

+242
-25
lines changed

8 files changed

+242
-25
lines changed

fsdev/file-op-9p.h

+5
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,11 @@ typedef struct ExtendedOps {
5959
#define V9FS_RDONLY 0x00000040
6060
#define V9FS_PROXY_SOCK_FD 0x00000080
6161
#define V9FS_PROXY_SOCK_NAME 0x00000100
62+
/*
63+
* multidevs option (either one of the two applies exclusively)
64+
*/
65+
#define V9FS_REMAP_INODES 0x00000200
66+
#define V9FS_FORBID_MULTIDEVS 0x00000400
6267

6368
#define V9FS_SEC_MASK 0x0000003C
6469

fsdev/qemu-fsdev-opts.c

+6-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,9 @@ static QemuOptsList qemu_fsdev_opts = {
3131
}, {
3232
.name = "readonly",
3333
.type = QEMU_OPT_BOOL,
34-
34+
}, {
35+
.name = "multidevs",
36+
.type = QEMU_OPT_STRING,
3537
}, {
3638
.name = "socket",
3739
.type = QEMU_OPT_STRING,
@@ -75,6 +77,9 @@ static QemuOptsList qemu_virtfs_opts = {
7577
}, {
7678
.name = "readonly",
7779
.type = QEMU_OPT_BOOL,
80+
}, {
81+
.name = "multidevs",
82+
.type = QEMU_OPT_STRING,
7883
}, {
7984
.name = "socket",
8085
.type = QEMU_OPT_STRING,

fsdev/qemu-fsdev.c

+1
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ static FsDriverTable FsDrivers[] = {
5858
"writeout",
5959
"fmode",
6060
"dmode",
61+
"multidevs",
6162
"throttling.bps-total",
6263
"throttling.bps-read",
6364
"throttling.bps-write",

hw/9pfs/9p-local.c

+21
Original file line numberDiff line numberDiff line change
@@ -1483,6 +1483,7 @@ static int local_parse_opts(QemuOpts *opts, FsDriverEntry *fse, Error **errp)
14831483
{
14841484
const char *sec_model = qemu_opt_get(opts, "security_model");
14851485
const char *path = qemu_opt_get(opts, "path");
1486+
const char *multidevs = qemu_opt_get(opts, "multidevs");
14861487
Error *local_err = NULL;
14871488

14881489
if (!sec_model) {
@@ -1506,6 +1507,26 @@ static int local_parse_opts(QemuOpts *opts, FsDriverEntry *fse, Error **errp)
15061507
return -1;
15071508
}
15081509

1510+
if (multidevs) {
1511+
if (!strcmp(multidevs, "remap")) {
1512+
fse->export_flags &= ~V9FS_FORBID_MULTIDEVS;
1513+
fse->export_flags |= V9FS_REMAP_INODES;
1514+
} else if (!strcmp(multidevs, "forbid")) {
1515+
fse->export_flags &= ~V9FS_REMAP_INODES;
1516+
fse->export_flags |= V9FS_FORBID_MULTIDEVS;
1517+
} else if (!strcmp(multidevs, "warn")) {
1518+
fse->export_flags &= ~V9FS_FORBID_MULTIDEVS;
1519+
fse->export_flags &= ~V9FS_REMAP_INODES;
1520+
} else {
1521+
error_setg(&local_err, "invalid multidevs property '%s'",
1522+
multidevs);
1523+
error_append_hint(&local_err, "Valid options are: multidevs="
1524+
"[remap|forbid|warn]\n");
1525+
error_propagate(errp, local_err);
1526+
return -1;
1527+
}
1528+
}
1529+
15091530
if (!path) {
15101531
error_setg(errp, "path property not set");
15111532
return -1;

hw/9pfs/9p.c

+167-21
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "trace.h"
2727
#include "migration/blocker.h"
2828
#include "sysemu/qtest.h"
29+
#include "qemu/xxhash.h"
2930

3031
int open_fd_hw;
3132
int total_open_fd;
@@ -572,23 +573,117 @@ static void coroutine_fn virtfs_reset(V9fsPDU *pdu)
572573
P9_STAT_MODE_NAMED_PIPE | \
573574
P9_STAT_MODE_SOCKET)
574575

575-
/* This is the algorithm from ufs in spfs */
576+
/* creative abuse of tb_hash_func7, which is based on xxhash */
577+
static uint32_t qpp_hash(QppEntry e)
578+
{
579+
return qemu_xxhash7(e.ino_prefix, e.dev, 0, 0, 0);
580+
}
581+
582+
static bool qpp_lookup_func(const void *obj, const void *userp)
583+
{
584+
const QppEntry *e1 = obj, *e2 = userp;
585+
return e1->dev == e2->dev && e1->ino_prefix == e2->ino_prefix;
586+
}
587+
588+
static void qpp_table_remove(void *p, uint32_t h, void *up)
589+
{
590+
g_free(p);
591+
}
592+
593+
static void qpp_table_destroy(struct qht *ht)
594+
{
595+
if (!ht || !ht->map) {
596+
return;
597+
}
598+
qht_iter(ht, qpp_table_remove, NULL);
599+
qht_destroy(ht);
600+
}
601+
602+
static void qpp_table_init(struct qht *ht)
603+
{
604+
qht_init(ht, qpp_lookup_func, 1, QHT_MODE_AUTO_RESIZE);
605+
}
606+
607+
/*
608+
* stat_to_qid needs to map inode number (64 bits) and device id (32 bits)
609+
* to a unique QID path (64 bits). To avoid having to map and keep track
610+
* of up to 2^64 objects, we map only the 16 highest bits of the inode plus
611+
* the device id to the 16 highest bits of the QID path. The 48 lowest bits
612+
* of the QID path equal to the lowest bits of the inode number.
613+
*
614+
* This takes advantage of the fact that inode number are usually not
615+
* random but allocated sequentially, so we have fewer items to keep
616+
* track of.
617+
*/
618+
static int qid_path_prefixmap(V9fsPDU *pdu, const struct stat *stbuf,
619+
uint64_t *path)
620+
{
621+
QppEntry lookup = {
622+
.dev = stbuf->st_dev,
623+
.ino_prefix = (uint16_t) (stbuf->st_ino >> 48)
624+
}, *val;
625+
uint32_t hash = qpp_hash(lookup);
626+
627+
val = qht_lookup(&pdu->s->qpp_table, &lookup, hash);
628+
629+
if (!val) {
630+
if (pdu->s->qp_prefix_next == 0) {
631+
/* we ran out of prefixes */
632+
error_report_once(
633+
"9p: No more prefixes available for remapping inodes from "
634+
"host to guest."
635+
);
636+
return -ENFILE;
637+
}
638+
639+
val = g_malloc0(sizeof(QppEntry));
640+
*val = lookup;
641+
642+
/* new unique inode prefix and device combo */
643+
val->qp_prefix = pdu->s->qp_prefix_next++;
644+
qht_insert(&pdu->s->qpp_table, val, hash, NULL);
645+
}
646+
647+
*path = ((uint64_t)val->qp_prefix << 48) | (stbuf->st_ino & QPATH_INO_MASK);
648+
return 0;
649+
}
650+
576651
static int stat_to_qid(V9fsPDU *pdu, const struct stat *stbuf, V9fsQID *qidp)
577652
{
653+
int err;
578654
size_t size;
579655

580-
if (pdu->s->dev_id != stbuf->st_dev) {
581-
warn_report_once(
582-
"9p: Multiple devices detected in same VirtFS export, "
583-
"which might lead to file ID collisions and severe "
584-
"misbehaviours on guest! You should use a separate "
585-
"export for each device shared from host."
586-
);
656+
if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) {
657+
/* map inode+device to qid path (fast path) */
658+
err = qid_path_prefixmap(pdu, stbuf, &qidp->path);
659+
if (err) {
660+
return err;
661+
}
662+
} else {
663+
if (pdu->s->dev_id != stbuf->st_dev) {
664+
if (pdu->s->ctx.export_flags & V9FS_FORBID_MULTIDEVS) {
665+
error_report_once(
666+
"9p: Multiple devices detected in same VirtFS export. "
667+
"Access of guest to additional devices is (partly) "
668+
"denied due to virtfs option 'multidevs=forbid' being "
669+
"effective."
670+
);
671+
return -ENODEV;
672+
} else {
673+
warn_report_once(
674+
"9p: Multiple devices detected in same VirtFS export, "
675+
"which might lead to file ID collisions and severe "
676+
"misbehaviours on guest! You should either use a "
677+
"separate export for each device shared from host or "
678+
"use virtfs option 'multidevs=remap'!"
679+
);
680+
}
681+
}
682+
memset(&qidp->path, 0, sizeof(qidp->path));
683+
size = MIN(sizeof(stbuf->st_ino), sizeof(qidp->path));
684+
memcpy(&qidp->path, &stbuf->st_ino, size);
587685
}
588686

589-
memset(&qidp->path, 0, sizeof(qidp->path));
590-
size = MIN(sizeof(stbuf->st_ino), sizeof(qidp->path));
591-
memcpy(&qidp->path, &stbuf->st_ino, size);
592687
qidp->version = stbuf->st_mtime ^ (stbuf->st_size << 8);
593688
qidp->type = 0;
594689
if (S_ISDIR(stbuf->st_mode)) {
@@ -618,6 +713,30 @@ static int coroutine_fn fid_to_qid(V9fsPDU *pdu, V9fsFidState *fidp,
618713
return 0;
619714
}
620715

716+
static int coroutine_fn dirent_to_qid(V9fsPDU *pdu, V9fsFidState *fidp,
717+
struct dirent *dent, V9fsQID *qidp)
718+
{
719+
struct stat stbuf;
720+
V9fsPath path;
721+
int err;
722+
723+
v9fs_path_init(&path);
724+
725+
err = v9fs_co_name_to_path(pdu, &fidp->path, dent->d_name, &path);
726+
if (err < 0) {
727+
goto out;
728+
}
729+
err = v9fs_co_lstat(pdu, &path, &stbuf);
730+
if (err < 0) {
731+
goto out;
732+
}
733+
err = stat_to_qid(pdu, &stbuf, qidp);
734+
735+
out:
736+
v9fs_path_free(&path);
737+
return err;
738+
}
739+
621740
V9fsPDU *pdu_alloc(V9fsState *s)
622741
{
623742
V9fsPDU *pdu = NULL;
@@ -1966,16 +2085,39 @@ static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp,
19662085
v9fs_string_free(&name);
19672086
return count;
19682087
}
1969-
/*
1970-
* Fill up just the path field of qid because the client uses
1971-
* only that. To fill the entire qid structure we will have
1972-
* to stat each dirent found, which is expensive
1973-
*/
1974-
size = MIN(sizeof(dent->d_ino), sizeof(qid.path));
1975-
memcpy(&qid.path, &dent->d_ino, size);
1976-
/* Fill the other fields with dummy values */
1977-
qid.type = 0;
1978-
qid.version = 0;
2088+
2089+
if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) {
2090+
/*
2091+
* dirent_to_qid() implies expensive stat call for each entry,
2092+
* we must do that here though since inode remapping requires
2093+
* the device id, which in turn might be different for
2094+
* different entries; we cannot make any assumption to avoid
2095+
* that here.
2096+
*/
2097+
err = dirent_to_qid(pdu, fidp, dent, &qid);
2098+
if (err < 0) {
2099+
v9fs_readdir_unlock(&fidp->fs.dir);
2100+
v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
2101+
v9fs_string_free(&name);
2102+
return err;
2103+
}
2104+
} else {
2105+
/*
2106+
* Fill up just the path field of qid because the client uses
2107+
* only that. To fill the entire qid structure we will have
2108+
* to stat each dirent found, which is expensive. For the
2109+
* latter reason we don't call dirent_to_qid() here. Only drawback
2110+
* is that no multi-device export detection of stat_to_qid()
2111+
* would be done and provided as error to the user here. But
2112+
* user would get that error anyway when accessing those
2113+
* files/dirs through other ways.
2114+
*/
2115+
size = MIN(sizeof(dent->d_ino), sizeof(qid.path));
2116+
memcpy(&qid.path, &dent->d_ino, size);
2117+
/* Fill the other fields with dummy values */
2118+
qid.type = 0;
2119+
qid.version = 0;
2120+
}
19792121

19802122
/* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */
19812123
len = pdu_marshal(pdu, 11 + count, "Qqbs",
@@ -3676,6 +3818,9 @@ int v9fs_device_realize_common(V9fsState *s, const V9fsTransport *t,
36763818

36773819
s->dev_id = stat.st_dev;
36783820

3821+
qpp_table_init(&s->qpp_table);
3822+
s->qp_prefix_next = 1; /* reserve 0 to detect overflow */
3823+
36793824
s->ctx.fst = &fse->fst;
36803825
fsdev_throttle_init(s->ctx.fst);
36813826

@@ -3697,6 +3842,7 @@ void v9fs_device_unrealize_common(V9fsState *s, Error **errp)
36973842
fsdev_throttle_cleanup(s->ctx.fst);
36983843
}
36993844
g_free(s->tag);
3845+
qpp_table_destroy(&s->qpp_table);
37003846
g_free(s->ctx.fs_root);
37013847
}
37023848

hw/9pfs/9p.h

+12
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include "fsdev/9p-iov-marshal.h"
99
#include "qemu/thread.h"
1010
#include "qemu/coroutine.h"
11+
#include "qemu/qht.h"
1112

1213
enum {
1314
P9_TLERROR = 6,
@@ -235,6 +236,15 @@ struct V9fsFidState
235236
V9fsFidState *rclm_lst;
236237
};
237238

239+
#define QPATH_INO_MASK ((1ULL << 48) - 1)
240+
241+
/* QID path prefix entry, see stat_to_qid */
242+
typedef struct {
243+
dev_t dev;
244+
uint16_t ino_prefix;
245+
uint16_t qp_prefix;
246+
} QppEntry;
247+
238248
struct V9fsState
239249
{
240250
QLIST_HEAD(, V9fsPDU) free_list;
@@ -257,6 +267,8 @@ struct V9fsState
257267
V9fsConf fsconf;
258268
V9fsQID root_qid;
259269
dev_t dev_id;
270+
struct qht qpp_table;
271+
uint16_t qp_prefix_next;
260272
};
261273

262274
/* 9p2000.L open flags */

qemu-options.hx

+24-2
Original file line numberDiff line numberDiff line change
@@ -1339,15 +1339,15 @@ ETEXI
13391339

13401340
DEF("virtfs", HAS_ARG, QEMU_OPTION_virtfs,
13411341
"-virtfs local,path=path,mount_tag=tag,security_model=mapped-xattr|mapped-file|passthrough|none\n"
1342-
" [,id=id][,writeout=immediate][,readonly][,fmode=fmode][,dmode=dmode]\n"
1342+
" [,id=id][,writeout=immediate][,readonly][,fmode=fmode][,dmode=dmode][,multidevs=remap|forbid|warn]\n"
13431343
"-virtfs proxy,mount_tag=tag,socket=socket[,id=id][,writeout=immediate][,readonly]\n"
13441344
"-virtfs proxy,mount_tag=tag,sock_fd=sock_fd[,id=id][,writeout=immediate][,readonly]\n"
13451345
"-virtfs synth,mount_tag=tag[,id=id][,readonly]\n",
13461346
QEMU_ARCH_ALL)
13471347

13481348
STEXI
13491349

1350-
@item -virtfs local,path=@var{path},mount_tag=@var{mount_tag} ,security_model=@var{security_model}[,writeout=@var{writeout}][,readonly] [,fmode=@var{fmode}][,dmode=@var{dmode}]
1350+
@item -virtfs local,path=@var{path},mount_tag=@var{mount_tag} ,security_model=@var{security_model}[,writeout=@var{writeout}][,readonly] [,fmode=@var{fmode}][,dmode=@var{dmode}][,multidevs=@var{multidevs}]
13511351
@itemx -virtfs proxy,socket=@var{socket},mount_tag=@var{mount_tag} [,writeout=@var{writeout}][,readonly]
13521352
@itemx -virtfs proxy,sock_fd=@var{sock_fd},mount_tag=@var{mount_tag} [,writeout=@var{writeout}][,readonly]
13531353
@itemx -virtfs synth,mount_tag=@var{mount_tag}
@@ -1403,6 +1403,28 @@ Specifies the default mode for newly created directories on the host. Works
14031403
only with security models "mapped-xattr" and "mapped-file".
14041404
@item mount_tag=@var{mount_tag}
14051405
Specifies the tag name to be used by the guest to mount this export point.
1406+
@item multidevs=@var{multidevs}
1407+
Specifies how to deal with multiple devices being shared with a 9p export.
1408+
Supported behaviours are either "remap", "forbid" or "warn". The latter is
1409+
the default behaviour on which virtfs 9p expects only one device to be
1410+
shared with the same export, and if more than one device is shared and
1411+
accessed via the same 9p export then only a warning message is logged
1412+
(once) by qemu on host side. In order to avoid file ID collisions on guest
1413+
you should either create a separate virtfs export for each device to be
1414+
shared with guests (recommended way) or you might use "remap" instead which
1415+
allows you to share multiple devices with only one export instead, which is
1416+
achieved by remapping the original inode numbers from host to guest in a
1417+
way that would prevent such collisions. Remapping inodes in such use cases
1418+
is required because the original device IDs from host are never passed and
1419+
exposed on guest. Instead all files of an export shared with virtfs always
1420+
share the same device id on guest. So two files with identical inode
1421+
numbers but from actually different devices on host would otherwise cause a
1422+
file ID collision and hence potential misbehaviours on guest. "forbid" on
1423+
the other hand assumes like "warn" that only one device is shared by the
1424+
same export, however it will not only log a warning message but also
1425+
deny access to additional devices on guest. Note though that "forbid" does
1426+
currently not block all possible file access operations (e.g. readdir()
1427+
would still return entries from other devices).
14061428
@end table
14071429
ETEXI
14081430

0 commit comments

Comments
 (0)