Skip to content

Commit 775b95d

Browse files
committed
drm/v3d: Clock V3D down when not in use.
My various attempts at re-enabling runtime PM have failed, so just crank the clock down when V3D is idle to reduce power consumption. Signed-off-by: Eric Anholt <[email protected]> drm/v3d: Plug dma_fence leak The irq_fence and done_fence are given a reference that is never released. The necessary dma_fence_put()s seem to have been deleted in error in an earlier commit. Fixes: 0b73676 ("drm/v3d: Clock V3D down when not in use.") Signed-off-by: Phil Elwell <[email protected]> v3d_drv: Handle missing clock more gracefully Signed-off-by: popcornmix <[email protected]> v3d_gem: Kick the clock so firmware knows we are using firmware clock interface Setting the v3d clock to low value allows firmware to handle dvfs in case where v3d hardware is not being actively used (e.g. console use). Signed-off-by: popcornmix <[email protected]> drm/v3d: Switch clock setting to new api Signed-off-by: Dom Cobley <[email protected]> drm/v3d: Convert to new clock range API Signed-off-by: Maxime Ripard <[email protected]> drm/v3d: Correct clock settng calls to new APIs There was a report that 6.12 kernel has lower benchmark scores than 6.6. I can confirm, and found it started with 6.8 kernel which moved some code into a new file (v3d_submit.c) and in two places the change to the clock api were missed. The effect of the bug is the v3d clock sometimes unwantedly drops to a lower rate. With this patch the benchmark scores are good again. Fixes: 8696303 Signed-off-by: Dom Cobley <[email protected]> drm/v3d: CPU job submissions shouldn't affect V3D GPU clock We can avoid calling the v3d_clock_up_put and v3d_clock_up_get when a job is submitted to a CPU queue. We don't need to change the V3D core frequency to run a CPU job as it is executed on the CPU. This way we avoid delaying timestamps CPU jobs by 4.5ms that is the time that it takes the firmware to increase the V3D core frequency. Fixes: fe6a858 ("drm/v3d: Correct clock settng calls to new APIs") Signed-off-by: Jose Maria Casanova Crespo <[email protected]> Reviewed-by: Maíra Canal <[email protected]>
1 parent 2a6a1cd commit 775b95d

File tree

4 files changed

+123
-3
lines changed

4 files changed

+123
-3
lines changed

drivers/gpu/drm/v3d/v3d_drv.c

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,17 @@
1717
#include <linux/dma-mapping.h>
1818
#include <linux/io.h>
1919
#include <linux/module.h>
20+
#include <linux/of.h>
2021
#include <linux/of_platform.h>
2122
#include <linux/platform_device.h>
2223
#include <linux/sched/clock.h>
2324
#include <linux/reset.h>
2425

2526
#include <drm/drm_drv.h>
2627
#include <drm/drm_managed.h>
28+
29+
#include <soc/bcm2835/raspberrypi-firmware.h>
30+
2731
#include <uapi/drm/v3d_drm.h>
2832

2933
#include "v3d_drv.h"
@@ -272,6 +276,8 @@ map_regs(struct v3d_dev *v3d, void __iomem **regs, const char *name)
272276
static int v3d_platform_drm_probe(struct platform_device *pdev)
273277
{
274278
struct device *dev = &pdev->dev;
279+
struct rpi_firmware *firmware;
280+
struct device_node *node;
275281
struct drm_device *drm;
276282
struct v3d_dev *v3d;
277283
int ret;
@@ -330,6 +336,34 @@ static int v3d_platform_drm_probe(struct platform_device *pdev)
330336
}
331337
}
332338

339+
v3d->clk = devm_clk_get(dev, NULL);
340+
if (IS_ERR_OR_NULL(v3d->clk)) {
341+
if (PTR_ERR(v3d->clk) != -EPROBE_DEFER)
342+
dev_err(dev, "Failed to get clock (%ld)\n", PTR_ERR(v3d->clk));
343+
return PTR_ERR(v3d->clk);
344+
}
345+
346+
node = rpi_firmware_find_node();
347+
if (!node)
348+
return -EINVAL;
349+
350+
firmware = rpi_firmware_get(node);
351+
of_node_put(node);
352+
if (!firmware)
353+
return -EPROBE_DEFER;
354+
355+
v3d->clk_up_rate = rpi_firmware_clk_get_max_rate(firmware,
356+
RPI_FIRMWARE_V3D_CLK_ID);
357+
rpi_firmware_put(firmware);
358+
359+
/* For downclocking, drop it to the minimum frequency we can get from
360+
* the CPRMAN clock generator dividing off our parent. The divider is
361+
* 4 bits, but ask for just higher than that so that rounding doesn't
362+
* make cprman reject our rate.
363+
*/
364+
v3d->clk_down_rate =
365+
(clk_get_rate(clk_get_parent(v3d->clk)) / (1 << 4)) + 10000;
366+
333367
if (v3d->ver < 41) {
334368
ret = map_regs(v3d, &v3d->gca_regs, "gca");
335369
if (ret)
@@ -358,6 +392,8 @@ static int v3d_platform_drm_probe(struct platform_device *pdev)
358392
ret = v3d_sysfs_init(dev);
359393
if (ret)
360394
goto drm_unregister;
395+
ret = clk_set_min_rate(v3d->clk, v3d->clk_down_rate);
396+
WARN_ON_ONCE(ret != 0);
361397

362398
return 0;
363399

drivers/gpu/drm/v3d/v3d_drv.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,12 @@ struct v3d_dev {
112112
void __iomem *bridge_regs;
113113
void __iomem *gca_regs;
114114
struct clk *clk;
115+
struct delayed_work clk_down_work;
116+
unsigned long clk_up_rate, clk_down_rate;
117+
struct mutex clk_lock;
118+
u32 clk_refcount;
119+
bool clk_up;
120+
115121
struct reset_control *reset;
116122

117123
/* Virtual and DMA addresses of the single shared page table. */
@@ -606,3 +612,4 @@ int v3d_perfmon_set_global_ioctl(struct drm_device *dev, void *data,
606612
/* v3d_sysfs.c */
607613
int v3d_sysfs_init(struct device *dev);
608614
void v3d_sysfs_destroy(struct device *dev);
615+
void v3d_submit_init(struct drm_device *dev);

drivers/gpu/drm/v3d/v3d_gem.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include <linux/device.h>
55
#include <linux/dma-mapping.h>
66
#include <linux/io.h>
7+
#include <linux/clk.h>
78
#include <linux/module.h>
89
#include <linux/platform_device.h>
910
#include <linux/reset.h>
@@ -269,6 +270,8 @@ v3d_gem_init(struct drm_device *dev)
269270
if (ret)
270271
return ret;
271272

273+
v3d_submit_init(dev);
274+
272275
/* Note: We don't allocate address 0. Various bits of HW
273276
* treat 0 as special, such as the occlusion query counters
274277
* where 0 means "disabled".

drivers/gpu/drm/v3d/v3d_submit.c

Lines changed: 77 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,52 @@
55
*/
66

77
#include <drm/drm_syncobj.h>
8+
#include <linux/clk.h>
89

910
#include "v3d_drv.h"
1011
#include "v3d_regs.h"
1112
#include "v3d_trace.h"
1213

14+
static void
15+
v3d_clock_down_work(struct work_struct *work)
16+
{
17+
struct v3d_dev *v3d =
18+
container_of(work, struct v3d_dev, clk_down_work.work);
19+
int ret;
20+
21+
ret = clk_set_min_rate(v3d->clk, v3d->clk_down_rate);
22+
v3d->clk_up = false;
23+
WARN_ON_ONCE(ret != 0);
24+
}
25+
26+
static void
27+
v3d_clock_up_get(struct v3d_dev *v3d)
28+
{
29+
mutex_lock(&v3d->clk_lock);
30+
if (v3d->clk_refcount++ == 0) {
31+
cancel_delayed_work_sync(&v3d->clk_down_work);
32+
if (!v3d->clk_up) {
33+
int ret;
34+
35+
ret = clk_set_min_rate(v3d->clk, v3d->clk_up_rate);
36+
WARN_ON_ONCE(ret != 0);
37+
v3d->clk_up = true;
38+
}
39+
}
40+
mutex_unlock(&v3d->clk_lock);
41+
}
42+
43+
static void
44+
v3d_clock_up_put(struct v3d_dev *v3d)
45+
{
46+
mutex_lock(&v3d->clk_lock);
47+
if (--v3d->clk_refcount == 0) {
48+
schedule_delayed_work(&v3d->clk_down_work,
49+
msecs_to_jiffies(100));
50+
}
51+
mutex_unlock(&v3d->clk_lock);
52+
}
53+
1354
/* Takes the reservation lock on all the BOs being referenced, so that
1455
* we can attach fences and update the reservations after pushing the job
1556
* to the queue.
@@ -85,9 +126,10 @@ v3d_lookup_bos(struct drm_device *dev,
85126
}
86127

87128
static void
88-
v3d_job_free(struct kref *ref)
129+
v3d_job_free_common(struct v3d_job *job,
130+
bool is_gpu_job)
89131
{
90-
struct v3d_job *job = container_of(ref, struct v3d_job, refcount);
132+
struct v3d_dev *v3d = job->v3d;
91133
int i;
92134

93135
if (job->bo) {
@@ -99,12 +141,31 @@ v3d_job_free(struct kref *ref)
99141
dma_fence_put(job->irq_fence);
100142
dma_fence_put(job->done_fence);
101143

144+
if (is_gpu_job)
145+
v3d_clock_up_put(v3d);
146+
102147
if (job->perfmon)
103148
v3d_perfmon_put(job->perfmon);
104149

105150
kfree(job);
106151
}
107152

153+
static void
154+
v3d_job_free(struct kref *ref)
155+
{
156+
struct v3d_job *job = container_of(ref, struct v3d_job, refcount);
157+
158+
v3d_job_free_common(job, true);
159+
}
160+
161+
static void
162+
v3d_cpu_job_free(struct kref *ref)
163+
{
164+
struct v3d_job *job = container_of(ref, struct v3d_job, refcount);
165+
166+
v3d_job_free_common(job, false);
167+
}
168+
108169
static void
109170
v3d_render_job_free(struct kref *ref)
110171
{
@@ -199,6 +260,8 @@ v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
199260
if (ret && ret != -ENOENT)
200261
goto fail_deps;
201262
}
263+
if (queue != V3D_CPU)
264+
v3d_clock_up_get(v3d);
202265

203266
kref_init(&job->refcount);
204267

@@ -1316,7 +1379,7 @@ v3d_submit_cpu_ioctl(struct drm_device *dev, void *data,
13161379
trace_v3d_submit_cpu_ioctl(&v3d->drm, cpu_job->job_type);
13171380

13181381
ret = v3d_job_init(v3d, file_priv, &cpu_job->base,
1319-
v3d_job_free, 0, &se, V3D_CPU);
1382+
v3d_cpu_job_free, 0, &se, V3D_CPU);
13201383
if (ret) {
13211384
v3d_job_deallocate((void *)&cpu_job);
13221385
goto fail;
@@ -1404,3 +1467,14 @@ v3d_submit_cpu_ioctl(struct drm_device *dev, void *data,
14041467

14051468
return ret;
14061469
}
1470+
1471+
void v3d_submit_init(struct drm_device *dev) {
1472+
struct v3d_dev *v3d = to_v3d_dev(dev);
1473+
1474+
mutex_init(&v3d->clk_lock);
1475+
INIT_DELAYED_WORK(&v3d->clk_down_work, v3d_clock_down_work);
1476+
1477+
/* kick the clock so firmware knows we are using firmware clock interface */
1478+
v3d_clock_up_get(v3d);
1479+
v3d_clock_up_put(v3d);
1480+
}

0 commit comments

Comments
 (0)