37
37
#include <sys/spa_impl.h>
38
38
#include <sys/zvol.h>
39
39
#include <sys/zvol_impl.h>
40
+ #include <cityhash.h>
40
41
41
42
#include <linux/blkdev_compat.h>
42
43
#include <linux/task_io_accounting_ops.h>
@@ -53,6 +54,12 @@ static unsigned int zvol_request_sync = 0;
53
54
static unsigned int zvol_prefetch_bytes = (128 * 1024 );
54
55
static unsigned long zvol_max_discard_blocks = 16384 ;
55
56
57
+ /*
58
+ * Switch taskq at multiple of 512 MB offset. This can be set to a lower value
59
+ * to utilize more threads for small files but may affect prefetch hits.
60
+ */
61
+ #define ZVOL_TASKQ_OFFSET_SHIFT 29
62
+
56
63
#ifndef HAVE_BLKDEV_GET_ERESTARTSYS
57
64
static unsigned int zvol_open_timeout_ms = 1000 ;
58
65
#endif
@@ -74,6 +81,7 @@ static boolean_t zvol_use_blk_mq = B_FALSE;
74
81
* read and write tests to a zvol in an NVMe pool (with 16 CPUs).
75
82
*/
76
83
static unsigned int zvol_blk_mq_blocks_per_thread = 8 ;
84
+ static unsigned int zvol_num_taskqs = 0 ;
77
85
#endif
78
86
79
87
#ifndef BLKDEV_DEFAULT_RQ
@@ -114,7 +122,11 @@ struct zvol_state_os {
114
122
boolean_t use_blk_mq ;
115
123
};
116
124
117
- static taskq_t * zvol_taskq ;
125
+ typedef struct zv_taskq {
126
+ uint_t tqs_cnt ;
127
+ taskq_t * * tqs_taskq ;
128
+ } zv_taskq_t ;
129
+ static zv_taskq_t zvol_taskqs ;
118
130
static struct ida zvol_ida ;
119
131
120
132
typedef struct zv_request_stack {
@@ -532,6 +544,17 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq,
532
544
}
533
545
534
546
zv_request_task_t * task ;
547
+ zv_taskq_t * ztqs = & zvol_taskqs ;
548
+ uint_t blk_mq_hw_queue = 0 ;
549
+ uint_t tq_idx ;
550
+ uint_t taskq_hash ;
551
+ #ifdef HAVE_BLK_MQ
552
+ if (rq )
553
+ blk_mq_hw_queue = rq -> mq_hctx -> queue_num ;
554
+ #endif
555
+ taskq_hash = cityhash4 ((uintptr_t )zv , offset >> ZVOL_TASKQ_OFFSET_SHIFT ,
556
+ blk_mq_hw_queue , 0 );
557
+ tq_idx = taskq_hash % ztqs -> tqs_cnt ;
535
558
536
559
if (rw == WRITE ) {
537
560
if (unlikely (zv -> zv_flags & ZVOL_RDONLY )) {
@@ -601,15 +624,15 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq,
601
624
zvol_discard (& zvr );
602
625
} else {
603
626
task = zv_request_task_create (zvr );
604
- taskq_dispatch_ent (zvol_taskq ,
627
+ taskq_dispatch_ent (ztqs -> tqs_taskq [ tq_idx ] ,
605
628
zvol_discard_task , task , 0 , & task -> ent );
606
629
}
607
630
} else {
608
631
if (force_sync ) {
609
632
zvol_write (& zvr );
610
633
} else {
611
634
task = zv_request_task_create (zvr );
612
- taskq_dispatch_ent (zvol_taskq ,
635
+ taskq_dispatch_ent (ztqs -> tqs_taskq [ tq_idx ] ,
613
636
zvol_write_task , task , 0 , & task -> ent );
614
637
}
615
638
}
@@ -631,7 +654,7 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq,
631
654
zvol_read (& zvr );
632
655
} else {
633
656
task = zv_request_task_create (zvr );
634
- taskq_dispatch_ent (zvol_taskq ,
657
+ taskq_dispatch_ent (ztqs -> tqs_taskq [ tq_idx ] ,
635
658
zvol_read_task , task , 0 , & task -> ent );
636
659
}
637
660
}
@@ -1598,8 +1621,40 @@ zvol_init(void)
1598
1621
zvol_actual_threads = MIN (MAX (zvol_threads , 1 ), 1024 );
1599
1622
}
1600
1623
1624
+ /*
1625
+ * Use atleast 32 zvol_threads but for many core system,
1626
+ * prefer 6 threads per taskq, but no more taskqs
1627
+ * than threads in them on large systems.
1628
+ *
1629
+ * taskq total
1630
+ * cpus taskqs threads threads
1631
+ * ------- ------- ------- -------
1632
+ * 1 1 32 32
1633
+ * 2 1 32 32
1634
+ * 4 1 32 32
1635
+ * 8 2 16 32
1636
+ * 16 3 11 33
1637
+ * 32 5 7 35
1638
+ * 64 8 8 64
1639
+ * 128 11 12 132
1640
+ * 256 16 16 256
1641
+ */
1642
+ zv_taskq_t * ztqs = & zvol_taskqs ;
1643
+ uint_t num_tqs = MIN (num_online_cpus (), zvol_num_taskqs );
1644
+ if (num_tqs == 0 ) {
1645
+ num_tqs = 1 + num_online_cpus () / 6 ;
1646
+ while (num_tqs * num_tqs > zvol_actual_threads )
1647
+ num_tqs -- ;
1648
+ }
1649
+ uint_t per_tq_thread = zvol_actual_threads / num_tqs ;
1650
+ if (per_tq_thread * num_tqs < zvol_actual_threads )
1651
+ per_tq_thread ++ ;
1652
+ ztqs -> tqs_cnt = num_tqs ;
1653
+ ztqs -> tqs_taskq = kmem_alloc (num_tqs * sizeof (taskq_t * ), KM_SLEEP );
1601
1654
error = register_blkdev (zvol_major , ZVOL_DRIVER );
1602
1655
if (error ) {
1656
+ kmem_free (ztqs -> tqs_taskq , ztqs -> tqs_cnt * sizeof (taskq_t * ));
1657
+ ztqs -> tqs_taskq = NULL ;
1603
1658
printk (KERN_INFO "ZFS: register_blkdev() failed %d\n" , error );
1604
1659
return (error );
1605
1660
}
@@ -1619,11 +1674,22 @@ zvol_init(void)
1619
1674
1024 );
1620
1675
}
1621
1676
#endif
1622
- zvol_taskq = taskq_create (ZVOL_DRIVER , zvol_actual_threads , maxclsyspri ,
1623
- zvol_actual_threads , INT_MAX , TASKQ_PREPOPULATE | TASKQ_DYNAMIC );
1624
- if (zvol_taskq == NULL ) {
1625
- unregister_blkdev (zvol_major , ZVOL_DRIVER );
1626
- return (- ENOMEM );
1677
+ for (uint_t i = 0 ; i < num_tqs ; i ++ ) {
1678
+ char name [32 ];
1679
+ (void ) snprintf (name , sizeof (name ), "%s_tq-%u" ,
1680
+ ZVOL_DRIVER , i );
1681
+ ztqs -> tqs_taskq [i ] = taskq_create (name , per_tq_thread ,
1682
+ maxclsyspri , per_tq_thread , INT_MAX ,
1683
+ TASKQ_PREPOPULATE | TASKQ_DYNAMIC );
1684
+ if (ztqs -> tqs_taskq [i ] == NULL ) {
1685
+ for (int j = i - 1 ; j >= 0 ; j -- )
1686
+ taskq_destroy (ztqs -> tqs_taskq [j ]);
1687
+ unregister_blkdev (zvol_major , ZVOL_DRIVER );
1688
+ kmem_free (ztqs -> tqs_taskq , ztqs -> tqs_cnt *
1689
+ sizeof (taskq_t * ));
1690
+ ztqs -> tqs_taskq = NULL ;
1691
+ return (- ENOMEM );
1692
+ }
1627
1693
}
1628
1694
1629
1695
zvol_init_impl ();
@@ -1634,9 +1700,22 @@ zvol_init(void)
1634
1700
void
1635
1701
zvol_fini (void )
1636
1702
{
1703
+ zv_taskq_t * ztqs = & zvol_taskqs ;
1637
1704
zvol_fini_impl ();
1638
1705
unregister_blkdev (zvol_major , ZVOL_DRIVER );
1639
- taskq_destroy (zvol_taskq );
1706
+
1707
+ if (ztqs -> tqs_taskq == NULL ) {
1708
+ ASSERT3U (ztqs -> tqs_cnt , = = , 0 );
1709
+ } else {
1710
+ for (uint_t i = 0 ; i < ztqs -> tqs_cnt ; i ++ ) {
1711
+ ASSERT3P (ztqs -> tqs_taskq [i ], != , NULL );
1712
+ taskq_destroy (ztqs -> tqs_taskq [i ]);
1713
+ }
1714
+ kmem_free (ztqs -> tqs_taskq , ztqs -> tqs_cnt *
1715
+ sizeof (taskq_t * ));
1716
+ ztqs -> tqs_taskq = NULL ;
1717
+ }
1718
+
1640
1719
ida_destroy (& zvol_ida );
1641
1720
}
1642
1721
@@ -1657,6 +1736,9 @@ MODULE_PARM_DESC(zvol_request_sync, "Synchronously handle bio requests");
1657
1736
module_param (zvol_max_discard_blocks , ulong , 0444 );
1658
1737
MODULE_PARM_DESC (zvol_max_discard_blocks , "Max number of blocks to discard" );
1659
1738
1739
+ module_param (zvol_num_taskqs , uint , 0444 );
1740
+ MODULE_PARM_DESC (zvol_num_taskqs , "Number of zvol taskqs" );
1741
+
1660
1742
module_param (zvol_prefetch_bytes , uint , 0644 );
1661
1743
MODULE_PARM_DESC (zvol_prefetch_bytes , "Prefetch N bytes at zvol start+end" );
1662
1744
0 commit comments