|
41 | 41 |
|
42 | 42 | #include <linux/blkdev_compat.h>
|
43 | 43 | #include <linux/task_io_accounting_ops.h>
|
| 44 | +#include <linux/workqueue.h> |
44 | 45 |
|
45 | 46 | #ifdef HAVE_BLK_MQ
|
46 | 47 | #include <linux/blk-mq.h>
|
@@ -1338,6 +1339,101 @@ zvol_wait_close(zvol_state_t *zv)
|
1338 | 1339 | {
|
1339 | 1340 | }
|
1340 | 1341 |
|
| 1342 | +struct add_disk_work { |
| 1343 | + struct delayed_work work; |
| 1344 | + struct gendisk *disk; |
| 1345 | + int error; |
| 1346 | +}; |
| 1347 | + |
| 1348 | +static int |
| 1349 | +__zvol_os_add_disk(struct gendisk *disk) |
| 1350 | +{ |
| 1351 | + int error = 0; |
| 1352 | +#ifdef HAVE_ADD_DISK_RET |
| 1353 | + error = add_disk(disk); |
| 1354 | +#else |
| 1355 | + add_disk(disk); |
| 1356 | +#endif |
| 1357 | + return (error); |
| 1358 | +} |
| 1359 | + |
| 1360 | +#if defined(HAVE_BDEV_FILE_OPEN_BY_PATH) |
| 1361 | +static void |
| 1362 | +zvol_os_add_disk_work(struct work_struct *work) |
| 1363 | +{ |
| 1364 | + struct add_disk_work *add_disk_work; |
| 1365 | + add_disk_work = container_of(work, struct add_disk_work, work.work); |
| 1366 | + add_disk_work->error = __zvol_os_add_disk(add_disk_work->disk); |
| 1367 | +} |
| 1368 | +#endif |
| 1369 | + |
| 1370 | +/* |
| 1371 | + * SPECIAL CASE: |
| 1372 | + * |
| 1373 | + * This function basically calls add_disk() from a workqueue. You may be |
| 1374 | + * thinking: why not just call add_disk() directly? |
| 1375 | + * |
| 1376 | + * When you call add_disk(), the zvol appears to the world. When this happens, |
| 1377 | + * the kernel calls disk_scan_partitions() on the zvol, which behaves |
| 1378 | + * differently on the 6.9+ kernels: |
| 1379 | + * |
| 1380 | + * - 6.8 and older kernels - |
| 1381 | + * disk_scan_partitions() |
| 1382 | + * handle = bdev_open_by_dev( |
| 1383 | + * zvol_open() |
| 1384 | + * bdev_release(handle); |
| 1385 | + * zvol_release() |
| 1386 | + * |
| 1387 | + * |
| 1388 | + * - 6.9+ kernels - |
| 1389 | + * disk_scan_partitions() |
| 1390 | + * file = bdev_file_open_by_dev() |
| 1391 | + * zvol_open() |
| 1392 | + * fput(file) |
| 1393 | + * < wait for return to userspace > |
| 1394 | + * zvol_release() |
| 1395 | + * |
| 1396 | + * The difference is that the bdev_release() from the 6.8 kernel is synchronous |
| 1397 | + * while the fput() from the 6.9 kernel is async. Or more specifically it's |
| 1398 | + * async that has to wait until we return to userspace (since it adds the fput |
| 1399 | + * into the caller's work queue with the TWA_RESUME flag set). This is not the |
| 1400 | + * behavior we want, since we want do things like create+destroy a zvol within |
| 1401 | + * a single ZFS_IOC_CREATE ioctl, and the "create" part needs to release the |
| 1402 | + * reference to the zvol while we're in the IOCTL, which can't wait until we |
| 1403 | + * return to userspace. |
| 1404 | + * |
| 1405 | + * We can get around this since fput() has a special codepath for when it's |
| 1406 | + * running in a kernel thread or interrupt. In those cases, it just puts the |
| 1407 | + * fput into the system workqueue, which we can force to run with |
| 1408 | + * __flush_workqueue(). That is why we call add_disk() from a workqueue - so it |
| 1409 | + * run from a kernel thread and "tricks" the fput() codepaths. |
| 1410 | + * |
| 1411 | + * Note that __flush_workqueue() is slowly getting deprecated. This may be ok |
| 1412 | + * though, since our IOCTL will spin on EBUSY waiting for the zvol release (via |
| 1413 | + * fput) to happen, which it eventually, naturally, will from the system_wq |
| 1414 | + * without us explicitly calling __flush_workqueue(). |
| 1415 | + */ |
| 1416 | +static int |
| 1417 | +zvol_os_add_disk(struct gendisk *disk) |
| 1418 | +{ |
| 1419 | +#if defined(HAVE_BDEV_FILE_OPEN_BY_PATH) /* 6.9+ kernel */ |
| 1420 | + struct add_disk_work add_disk_work; |
| 1421 | + |
| 1422 | + INIT_DELAYED_WORK(&add_disk_work.work, zvol_os_add_disk_work); |
| 1423 | + add_disk_work.disk = disk; |
| 1424 | + add_disk_work.error = 0; |
| 1425 | + |
| 1426 | + /* Use *_delayed_work functions since they're not GPL'd */ |
| 1427 | + schedule_delayed_work(&add_disk_work.work, 0); |
| 1428 | + flush_delayed_work(&add_disk_work.work); |
| 1429 | + |
| 1430 | + __flush_workqueue(system_wq); |
| 1431 | + return (add_disk_work.error); |
| 1432 | +#else /* <= 6.8 kernel */ |
| 1433 | + return (__zvol_os_add_disk(disk)); |
| 1434 | +#endif |
| 1435 | +} |
| 1436 | + |
1341 | 1437 | /*
|
1342 | 1438 | * Create a block device minor node and setup the linkage between it
|
1343 | 1439 | * and the specified volume. Once this function returns the block
|
@@ -1549,11 +1645,7 @@ zvol_os_create_minor(const char *name)
|
1549 | 1645 | rw_enter(&zvol_state_lock, RW_WRITER);
|
1550 | 1646 | zvol_insert(zv);
|
1551 | 1647 | rw_exit(&zvol_state_lock);
|
1552 |
| -#ifdef HAVE_ADD_DISK_RET |
1553 |
| - error = add_disk(zv->zv_zso->zvo_disk); |
1554 |
| -#else |
1555 |
| - add_disk(zv->zv_zso->zvo_disk); |
1556 |
| -#endif |
| 1648 | + error = zvol_os_add_disk(zv->zv_zso->zvo_disk); |
1557 | 1649 | } else {
|
1558 | 1650 | ida_simple_remove(&zvol_ida, idx);
|
1559 | 1651 | }
|
|
0 commit comments