|
87 | 87 | * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros
|
88 | 88 | * can return EIO from the calling function.
|
89 | 89 | *
|
90 |
| - * (2) zrele() should always be the last thing except for zil_commit() |
91 |
| - * (if necessary) and ZFS_EXIT(). This is for 3 reasons: |
92 |
| - * First, if it's the last reference, the vnode/znode |
93 |
| - * can be freed, so the zp may point to freed memory. Second, the last |
94 |
| - * reference will call zfs_zinactive(), which may induce a lot of work -- |
95 |
| - * pushing cached pages (which acquires range locks) and syncing out |
96 |
| - * cached atime changes. Third, zfs_zinactive() may require a new tx, |
97 |
| - * which could deadlock the system if you were already holding one. |
98 |
| - * If you must call zrele() within a tx then use zfs_zrele_async(). |
| 90 | + * (2) zrele() should always be the last thing except for zil_commit() (if |
| 91 | + * necessary) and ZFS_EXIT(). This is for 3 reasons: First, if it's the |
| 92 | + * last reference, the vnode/znode can be freed, so the zp may point to |
| 93 | + * freed memory. Second, the last reference will call zfs_zinactive(), |
| 94 | + * which may induce a lot of work -- pushing cached pages (which acquires |
| 95 | + * range locks) and syncing out cached atime changes. Third, |
| 96 | + * zfs_zinactive() may require a new tx, which could deadlock the system |
| 97 | + * if you were already holding one. This deadlock occurs because the tx |
| 98 | + * currently being operated on prevents a txg from syncing, which |
| 99 | + * prevents the new tx from progressing, resulting in a deadlock. If you |
| 100 | + * must call zrele() within a tx, use zfs_zrele_async(). Note that iput() |
| 101 | + * is a synonym for zrele(). |
99 | 102 | *
|
100 | 103 | * (3) All range locks must be grabbed before calling dmu_tx_assign(),
|
101 | 104 | * as they can span dmu_tx_assign() calls.
|
@@ -476,11 +479,18 @@ zfs_zrele_async(znode_t *zp)
|
476 | 479 | ASSERT(atomic_read(&ip->i_count) > 0);
|
477 | 480 | ASSERT(os != NULL);
|
478 | 481 |
|
479 |
| - if (atomic_read(&ip->i_count) == 1) |
| 482 | + /* |
| 483 | + * If decrementing the count would put us at 0, we can't do it inline |
| 484 | + * here, because that would be synchronous. Instead, dispatch an iput |
| 485 | + * to run later. |
| 486 | + * |
| 487 | + * For more information on the dangers of a synchronous iput, see the |
| 488 | + * header comment of this file. |
| 489 | + */ |
| 490 | + if (!atomic_add_unless(&ip->i_count, -1, 1)) { |
480 | 491 | VERIFY(taskq_dispatch(dsl_pool_zrele_taskq(dmu_objset_pool(os)),
|
481 | 492 | (task_func_t *)iput, ip, TQ_SLEEP) != TASKQID_INVALID);
|
482 |
| - else |
483 |
| - zrele(zp); |
| 493 | + } |
484 | 494 | }
|
485 | 495 |
|
486 | 496 | /* ARGSUSED */
|
|
0 commit comments