Skip to content

Commit 437b1eb

Browse files
kwachowsgregkh
authored andcommitted
accel/ivpu: Abort all jobs after command queue unregister
commit 5bbccad upstream. With hardware scheduler it is not expected to receive JOB_DONE notifications from NPU FW for the jobs aborted due to command queue destroy JSM command. Remove jobs submitted to unregistered command queue from submitted_jobs_xa to avoid triggering a TDR in such case. Add explicit submitted_jobs_lock that protects access to list of submitted jobs which is now used to find jobs to abort. Move context abort procedure to separate work queue not to slow down handling of IPCs or DCT requests in case where job abort takes longer, especially when destruction of the last job of a specific context results in context release. Signed-off-by: Karol Wachowski <[email protected]> Signed-off-by: Maciej Falkowski <[email protected]> Reviewed-by: Jacek Lawrynowicz <[email protected]> Signed-off-by: Jacek Lawrynowicz <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected] [ This backport removes all the lines from upstream commit related to the command queue UAPI, as it is not present in the 6.14 kernel and should not be backported. ] Signed-off-by: Jacek Lawrynowicz <[email protected]> Signed-off-by: Greg Kroah-Hartman <[email protected]>
1 parent 01db0e1 commit 437b1eb

File tree

6 files changed

+79
-49
lines changed

6 files changed

+79
-49
lines changed

drivers/accel/ivpu/ivpu_drv.c

Lines changed: 6 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,6 @@
3636
#define DRIVER_VERSION_STR "1.0.0 " UTS_RELEASE
3737
#endif
3838

39-
static struct lock_class_key submitted_jobs_xa_lock_class_key;
40-
4139
int ivpu_dbg_mask;
4240
module_param_named(dbg_mask, ivpu_dbg_mask, int, 0644);
4341
MODULE_PARM_DESC(dbg_mask, "Driver debug mask. See IVPU_DBG_* macros.");
@@ -465,26 +463,6 @@ static const struct drm_driver driver = {
465463
.major = 1,
466464
};
467465

468-
static void ivpu_context_abort_invalid(struct ivpu_device *vdev)
469-
{
470-
struct ivpu_file_priv *file_priv;
471-
unsigned long ctx_id;
472-
473-
mutex_lock(&vdev->context_list_lock);
474-
475-
xa_for_each(&vdev->context_xa, ctx_id, file_priv) {
476-
if (!file_priv->has_mmu_faults || file_priv->aborted)
477-
continue;
478-
479-
mutex_lock(&file_priv->lock);
480-
ivpu_context_abort_locked(file_priv);
481-
file_priv->aborted = true;
482-
mutex_unlock(&file_priv->lock);
483-
}
484-
485-
mutex_unlock(&vdev->context_list_lock);
486-
}
487-
488466
static irqreturn_t ivpu_irq_thread_handler(int irq, void *arg)
489467
{
490468
struct ivpu_device *vdev = arg;
@@ -498,9 +476,6 @@ static irqreturn_t ivpu_irq_thread_handler(int irq, void *arg)
498476
case IVPU_HW_IRQ_SRC_IPC:
499477
ivpu_ipc_irq_thread_handler(vdev);
500478
break;
501-
case IVPU_HW_IRQ_SRC_MMU_EVTQ:
502-
ivpu_context_abort_invalid(vdev);
503-
break;
504479
case IVPU_HW_IRQ_SRC_DCT:
505480
ivpu_pm_dct_irq_thread_handler(vdev);
506481
break;
@@ -617,16 +592,21 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
617592
xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
618593
xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1);
619594
xa_init_flags(&vdev->db_xa, XA_FLAGS_ALLOC1);
620-
lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key);
621595
INIT_LIST_HEAD(&vdev->bo_list);
622596

623597
vdev->db_limit.min = IVPU_MIN_DB;
624598
vdev->db_limit.max = IVPU_MAX_DB;
625599

600+
INIT_WORK(&vdev->context_abort_work, ivpu_context_abort_thread_handler);
601+
626602
ret = drmm_mutex_init(&vdev->drm, &vdev->context_list_lock);
627603
if (ret)
628604
goto err_xa_destroy;
629605

606+
ret = drmm_mutex_init(&vdev->drm, &vdev->submitted_jobs_lock);
607+
if (ret)
608+
goto err_xa_destroy;
609+
630610
ret = drmm_mutex_init(&vdev->drm, &vdev->bo_list_lock);
631611
if (ret)
632612
goto err_xa_destroy;

drivers/accel/ivpu/ivpu_drv.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ struct ivpu_device {
137137
struct mutex context_list_lock; /* Protects user context addition/removal */
138138
struct xarray context_xa;
139139
struct xa_limit context_xa_limit;
140+
struct work_struct context_abort_work;
140141

141142
struct xarray db_xa;
142143
struct xa_limit db_limit;
@@ -145,6 +146,7 @@ struct ivpu_device {
145146
struct mutex bo_list_lock; /* Protects bo_list */
146147
struct list_head bo_list;
147148

149+
struct mutex submitted_jobs_lock; /* Protects submitted_jobs */
148150
struct xarray submitted_jobs_xa;
149151
struct ivpu_ipc_consumer job_done_consumer;
150152

drivers/accel/ivpu/ivpu_job.c

Lines changed: 66 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,8 @@ static int ivpu_cmdq_fini(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cm
223223
if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) {
224224
ret = ivpu_jsm_hws_destroy_cmdq(vdev, file_priv->ctx.id, cmdq->id);
225225
if (!ret)
226-
ivpu_dbg(vdev, JOB, "Command queue %d destroyed\n", cmdq->id);
226+
ivpu_dbg(vdev, JOB, "Command queue %d destroyed, ctx %d\n",
227+
cmdq->id, file_priv->ctx.id);
227228
}
228229

229230
ret = ivpu_jsm_unregister_db(vdev, cmdq->db_id);
@@ -324,6 +325,8 @@ void ivpu_context_abort_locked(struct ivpu_file_priv *file_priv)
324325

325326
if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_OS)
326327
ivpu_jsm_context_release(vdev, file_priv->ctx.id);
328+
329+
file_priv->aborted = true;
327330
}
328331

329332
static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job)
@@ -462,23 +465,23 @@ static struct ivpu_job *ivpu_job_remove_from_submitted_jobs(struct ivpu_device *
462465
{
463466
struct ivpu_job *job;
464467

465-
xa_lock(&vdev->submitted_jobs_xa);
466-
job = __xa_erase(&vdev->submitted_jobs_xa, job_id);
468+
lockdep_assert_held(&vdev->submitted_jobs_lock);
467469

470+
job = xa_erase(&vdev->submitted_jobs_xa, job_id);
468471
if (xa_empty(&vdev->submitted_jobs_xa) && job) {
469472
vdev->busy_time = ktime_add(ktime_sub(ktime_get(), vdev->busy_start_ts),
470473
vdev->busy_time);
471474
}
472475

473-
xa_unlock(&vdev->submitted_jobs_xa);
474-
475476
return job;
476477
}
477478

478479
static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32 job_status)
479480
{
480481
struct ivpu_job *job;
481482

483+
lockdep_assert_held(&vdev->submitted_jobs_lock);
484+
482485
job = ivpu_job_remove_from_submitted_jobs(vdev, job_id);
483486
if (!job)
484487
return -ENOENT;
@@ -497,6 +500,10 @@ static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32
497500
ivpu_stop_job_timeout_detection(vdev);
498501

499502
ivpu_rpm_put(vdev);
503+
504+
if (!xa_empty(&vdev->submitted_jobs_xa))
505+
ivpu_start_job_timeout_detection(vdev);
506+
500507
return 0;
501508
}
502509

@@ -505,8 +512,12 @@ void ivpu_jobs_abort_all(struct ivpu_device *vdev)
505512
struct ivpu_job *job;
506513
unsigned long id;
507514

515+
mutex_lock(&vdev->submitted_jobs_lock);
516+
508517
xa_for_each(&vdev->submitted_jobs_xa, id, job)
509518
ivpu_job_signal_and_destroy(vdev, id, DRM_IVPU_JOB_STATUS_ABORTED);
519+
520+
mutex_unlock(&vdev->submitted_jobs_lock);
510521
}
511522

512523
static int ivpu_job_submit(struct ivpu_job *job, u8 priority)
@@ -531,15 +542,16 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority)
531542
goto err_unlock_file_priv;
532543
}
533544

534-
xa_lock(&vdev->submitted_jobs_xa);
545+
mutex_lock(&vdev->submitted_jobs_lock);
546+
535547
is_first_job = xa_empty(&vdev->submitted_jobs_xa);
536-
ret = __xa_alloc_cyclic(&vdev->submitted_jobs_xa, &job->job_id, job, file_priv->job_limit,
537-
&file_priv->job_id_next, GFP_KERNEL);
548+
ret = xa_alloc_cyclic(&vdev->submitted_jobs_xa, &job->job_id, job, file_priv->job_limit,
549+
&file_priv->job_id_next, GFP_KERNEL);
538550
if (ret < 0) {
539551
ivpu_dbg(vdev, JOB, "Too many active jobs in ctx %d\n",
540552
file_priv->ctx.id);
541553
ret = -EBUSY;
542-
goto err_unlock_submitted_jobs_xa;
554+
goto err_unlock_submitted_jobs;
543555
}
544556

545557
ret = ivpu_cmdq_push_job(cmdq, job);
@@ -562,19 +574,21 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority)
562574
job->job_id, file_priv->ctx.id, job->engine_idx, priority,
563575
job->cmd_buf_vpu_addr, cmdq->jobq->header.tail);
564576

565-
xa_unlock(&vdev->submitted_jobs_xa);
566-
577+
mutex_unlock(&vdev->submitted_jobs_lock);
567578
mutex_unlock(&file_priv->lock);
568579

569-
if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW))
580+
if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_HW)) {
581+
mutex_lock(&vdev->submitted_jobs_lock);
570582
ivpu_job_signal_and_destroy(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS);
583+
mutex_unlock(&vdev->submitted_jobs_lock);
584+
}
571585

572586
return 0;
573587

574588
err_erase_xa:
575-
__xa_erase(&vdev->submitted_jobs_xa, job->job_id);
576-
err_unlock_submitted_jobs_xa:
577-
xa_unlock(&vdev->submitted_jobs_xa);
589+
xa_erase(&vdev->submitted_jobs_xa, job->job_id);
590+
err_unlock_submitted_jobs:
591+
mutex_unlock(&vdev->submitted_jobs_lock);
578592
err_unlock_file_priv:
579593
mutex_unlock(&file_priv->lock);
580594
ivpu_rpm_put(vdev);
@@ -745,7 +759,6 @@ ivpu_job_done_callback(struct ivpu_device *vdev, struct ivpu_ipc_hdr *ipc_hdr,
745759
struct vpu_jsm_msg *jsm_msg)
746760
{
747761
struct vpu_ipc_msg_payload_job_done *payload;
748-
int ret;
749762

750763
if (!jsm_msg) {
751764
ivpu_err(vdev, "IPC message has no JSM payload\n");
@@ -758,9 +771,10 @@ ivpu_job_done_callback(struct ivpu_device *vdev, struct ivpu_ipc_hdr *ipc_hdr,
758771
}
759772

760773
payload = (struct vpu_ipc_msg_payload_job_done *)&jsm_msg->payload;
761-
ret = ivpu_job_signal_and_destroy(vdev, payload->job_id, payload->job_status);
762-
if (!ret && !xa_empty(&vdev->submitted_jobs_xa))
763-
ivpu_start_job_timeout_detection(vdev);
774+
775+
mutex_lock(&vdev->submitted_jobs_lock);
776+
ivpu_job_signal_and_destroy(vdev, payload->job_id, payload->job_status);
777+
mutex_unlock(&vdev->submitted_jobs_lock);
764778
}
765779

766780
void ivpu_job_done_consumer_init(struct ivpu_device *vdev)
@@ -773,3 +787,36 @@ void ivpu_job_done_consumer_fini(struct ivpu_device *vdev)
773787
{
774788
ivpu_ipc_consumer_del(vdev, &vdev->job_done_consumer);
775789
}
790+
791+
void ivpu_context_abort_thread_handler(struct work_struct *work)
792+
{
793+
struct ivpu_device *vdev = container_of(work, struct ivpu_device, context_abort_work);
794+
struct ivpu_file_priv *file_priv;
795+
unsigned long ctx_id;
796+
struct ivpu_job *job;
797+
unsigned long id;
798+
799+
mutex_lock(&vdev->context_list_lock);
800+
xa_for_each(&vdev->context_xa, ctx_id, file_priv) {
801+
if (!file_priv->has_mmu_faults || file_priv->aborted)
802+
continue;
803+
804+
mutex_lock(&file_priv->lock);
805+
ivpu_context_abort_locked(file_priv);
806+
mutex_unlock(&file_priv->lock);
807+
}
808+
mutex_unlock(&vdev->context_list_lock);
809+
810+
if (vdev->fw->sched_mode != VPU_SCHEDULING_MODE_HW)
811+
return;
812+
/*
813+
* In hardware scheduling mode NPU already has stopped processing jobs
814+
* and won't send us any further notifications, thus we have to free job related resources
815+
* and notify userspace
816+
*/
817+
mutex_lock(&vdev->submitted_jobs_lock);
818+
xa_for_each(&vdev->submitted_jobs_xa, id, job)
819+
if (job->file_priv->aborted)
820+
ivpu_job_signal_and_destroy(vdev, job->job_id, DRM_IVPU_JOB_STATUS_ABORTED);
821+
mutex_unlock(&vdev->submitted_jobs_lock);
822+
}

drivers/accel/ivpu/ivpu_job.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev);
6666

6767
void ivpu_job_done_consumer_init(struct ivpu_device *vdev);
6868
void ivpu_job_done_consumer_fini(struct ivpu_device *vdev);
69+
void ivpu_context_abort_thread_handler(struct work_struct *work);
6970

7071
void ivpu_jobs_abort_all(struct ivpu_device *vdev);
7172

drivers/accel/ivpu/ivpu_mmu.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -890,8 +890,7 @@ void ivpu_mmu_irq_evtq_handler(struct ivpu_device *vdev)
890890
REGV_WR32(IVPU_MMU_REG_EVTQ_CONS_SEC, vdev->mmu->evtq.cons);
891891
}
892892

893-
if (!kfifo_put(&vdev->hw->irq.fifo, IVPU_HW_IRQ_SRC_MMU_EVTQ))
894-
ivpu_err_ratelimited(vdev, "IRQ FIFO full\n");
893+
queue_work(system_wq, &vdev->context_abort_work);
895894
}
896895

897896
void ivpu_mmu_evtq_dump(struct ivpu_device *vdev)

drivers/accel/ivpu/ivpu_sysfs.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,12 @@ npu_busy_time_us_show(struct device *dev, struct device_attribute *attr, char *b
3030
struct ivpu_device *vdev = to_ivpu_device(drm);
3131
ktime_t total, now = 0;
3232

33-
xa_lock(&vdev->submitted_jobs_xa);
33+
mutex_lock(&vdev->submitted_jobs_lock);
34+
3435
total = vdev->busy_time;
3536
if (!xa_empty(&vdev->submitted_jobs_xa))
3637
now = ktime_sub(ktime_get(), vdev->busy_start_ts);
37-
xa_unlock(&vdev->submitted_jobs_xa);
38+
mutex_unlock(&vdev->submitted_jobs_lock);
3839

3940
return sysfs_emit(buf, "%lld\n", ktime_to_us(ktime_add(total, now)));
4041
}

0 commit comments

Comments
 (0)