On 01/04/2019 08:47, Rob Herring wrote:
This adds the initial driver for panfrost which supports Arm Mali
Midgard and Bifrost family of GPUs. Currently, only the T860 and
T760 Midgard GPUs have been tested.
FWIW, on an antique T624 (Juno) it seems to work no worse than the kbase
driver plus panfrost-nondrm, which is to say it gets far enough to prove
that the userspace definitely doesn't support T624 (kmscube manages to
show a grey background, but the GPU is constantly falling over with page
faults trying to dereference address 0 - for obvious reasons I'm not
going to get any further involved in debugging that).
A couple of discoveries and general observations below.
v2:
- Add GPU reset on job hangs (Tomeu)
- Add RuntimePM and devfreq support (Tomeu)
- Fix T760 support (Tomeu)
- Add a TODO file (Rob, Tomeu)
- Support multiple in fences (Tomeu)
- Drop support for shared fences (Tomeu)
- Fill in MMU de-init (Rob)
- Move register definitions back to single header (Rob)
- Clean-up hardcoded job submit todos (Rob)
- Implement feature setup based on features/issues (Rob)
- Add remaining Midgard DT compatible strings (Rob)
Cc: Maarten Lankhorst <[email protected]>
Cc: Maxime Ripard <[email protected]>
Cc: Sean Paul <[email protected]>
Cc: David Airlie <[email protected]>
Cc: Daniel Vetter <[email protected]>
Cc: Alyssa Rosenzweig <[email protected]>
Cc: Lyude Paul <[email protected]>
Cc: Eric Anholt <[email protected]>
Signed-off-by: Marty E. Plummer <[email protected]>
Signed-off-by: Tomeu Vizoso <[email protected]>
Signed-off-by: Rob Herring <[email protected]>
---
[...]
diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c
b/drivers/gpu/drm/panfrost/panfrost_device.c
new file mode 100644
index 000000000000..227ba5202a6f
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_device.c
@@ -0,0 +1,227 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2018 Marty E. Plummer <[email protected]> */
+/* Copyright 2019 Linaro, Ltd, Rob Herring <[email protected]> */
+
+#include <linux/clk.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/regulator/consumer.h>
+
+#include "panfrost_device.h"
+#include "panfrost_devfreq.h"
+#include "panfrost_features.h"
+#include "panfrost_gpu.h"
+#include "panfrost_job.h"
+#include "panfrost_mmu.h"
+
+static int panfrost_clk_init(struct panfrost_device *pfdev)
+{
+ int err;
+ unsigned long rate;
+
+ pfdev->clock = devm_clk_get(pfdev->dev, NULL);
+ if (IS_ERR(pfdev->clock)) {
The DT binding says clocks are optional, but this doesn't treat them as
such.
+ dev_err(pfdev->dev, "get clock failed %ld\n",
PTR_ERR(pfdev->clock));
+ return PTR_ERR(pfdev->clock);
+ }
+
+ rate = clk_get_rate(pfdev->clock);
+ dev_info(pfdev->dev, "clock rate = %lu\n", rate);
+
+ err = clk_prepare_enable(pfdev->clock);
+ if (err)
+ return err;
+
+ return 0;
+}
[...]
diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c
b/drivers/gpu/drm/panfrost/panfrost_drv.c
new file mode 100644
index 000000000000..57a99032bcc6
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
[...]
+static int panfrost_probe(struct platform_device *pdev)
+{
+ struct panfrost_device *pfdev;
+ struct drm_device *ddev;
+ int err;
+
+ pfdev = devm_kzalloc(&pdev->dev, sizeof(*pfdev), GFP_KERNEL);
+ if (!pfdev)
+ return -ENOMEM;
+
+ pfdev->pdev = pdev;
+ pfdev->dev = &pdev->dev;
+
+ platform_set_drvdata(pdev, pfdev);
+
+ /* Allocate and initialze the DRM device. */
+ ddev = drm_dev_alloc(&panfrost_drm_driver, &pdev->dev);
+ if (IS_ERR(ddev))
+ return PTR_ERR(ddev);
+
+ ddev->dev_private = pfdev;
+ pfdev->ddev = ddev;
+
+ spin_lock_init(&pfdev->mm_lock);
+
+ /* 4G enough for now. can be 48-bit */
+ drm_mm_init(&pfdev->mm, SZ_32M >> PAGE_SHIFT, SZ_4G);
You probably want a dma_set_mask_and_coherent() call for your 'real'
output address size somewhere - the default 32-bit mask works out OK for
RK3399, but on systems with RAM above 4GB io-pgtable will get very
unhappy about DMA bounce-buffering.
+
+ pm_runtime_use_autosuspend(pfdev->dev);
+ pm_runtime_set_autosuspend_delay(pfdev->dev, 50); /* ~3 frames */
+ pm_runtime_enable(pfdev->dev);
+
+ err = panfrost_device_init(pfdev);
+ if (err) {
+ dev_err(&pdev->dev, "Fatal error during GPU init\n");
+ goto err_out0;
+ }
+
+ err = panfrost_devfreq_init(pfdev);
+ if (err) {
+ dev_err(&pdev->dev, "Fatal error during devfreq init\n");
+ goto err_out1;
+ }
+
+ /*
+ * Register the DRM device with the core and the connectors with
+ * sysfs
+ */
+ err = drm_dev_register(ddev, 0);
+ if (err < 0)
+ goto err_out1;
+
+ return 0;
+
+err_out1:
+ panfrost_device_fini(pfdev);
+err_out0:
+ drm_dev_put(ddev);
Reloading the module after a failed probe complains about an unbalanced
pm_runtime_enable(), so I guess you need a disable somewhere around here.
+ return err;
+}
+
+static int panfrost_remove(struct platform_device *pdev)
+{
+ struct panfrost_device *pfdev = platform_get_drvdata(pdev);
+ struct drm_device *ddev = pfdev->ddev;
+
+ drm_dev_unregister(ddev);
+ pm_runtime_get_sync(pfdev->dev);
+ pm_runtime_put_sync_autosuspend(pfdev->dev);
+ pm_runtime_disable(pfdev->dev);
+ panfrost_device_fini(pfdev);
+ drm_dev_put(ddev);
+ return 0;
+}
+
+static const struct of_device_id dt_match[] = {
+ { .compatible = "arm,mali-t604" },
+ { .compatible = "arm,mali-t624" },
+ { .compatible = "arm,mali-t628" },
+ { .compatible = "arm,mali-t720" },
+ { .compatible = "arm,mali-t760" },
+ { .compatible = "arm,mali-t820" },
+ { .compatible = "arm,mali-t830" },
+ { .compatible = "arm,mali-t860" },
+ { .compatible = "arm,mali-t880" },
Any chance of resurrecting the generic "arm,mali-midgard" compatible? :P
+ {}
+};
+MODULE_DEVICE_TABLE(of, dt_match);
+
+static const struct dev_pm_ops panfrost_pm_ops = {
+ SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
pm_runtime_force_resume)
+ SET_RUNTIME_PM_OPS(panfrost_device_suspend, panfrost_device_resume,
NULL)
+};
+
+static struct platform_driver panfrost_driver = {
+ .probe = panfrost_probe,
+ .remove = panfrost_remove,
+ .driver = {
+ .name = "panfrost",
+ .pm = &panfrost_pm_ops,
+ .of_match_table = dt_match,
+ },
+};
+module_platform_driver(panfrost_driver);
+
+MODULE_AUTHOR("Panfrost Project Developers");
+MODULE_DESCRIPTION("Panfrost DRM Driver");
+MODULE_LICENSE("GPL v2");
[...]
diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c
b/drivers/gpu/drm/panfrost/panfrost_gpu.c
new file mode 100644
index 000000000000..867e2ba3a761
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c
[...]
+static void panfrost_gpu_init_quirks(struct panfrost_device *pfdev)
+{
+ u32 quirks = 0;
+
+ if (panfrost_has_hw_issue(pfdev, HW_ISSUE_8443) ||
+ panfrost_has_hw_issue(pfdev, HW_ISSUE_11035))
+ quirks |= SC_LS_PAUSEBUFFER_DISABLE;
+
+ if (panfrost_has_hw_issue(pfdev, HW_ISSUE_10327))
+ quirks |= SC_SDC_DISABLE_OQ_DISCARD;
+
+ if (panfrost_has_hw_issue(pfdev, HW_ISSUE_10797))
+ quirks |= SC_ENABLE_TEXGRD_FLAGS;
+
+ if (!panfrost_has_hw_issue(pfdev, GPUCORE_1619)) {
+ if (panfrost_model_cmp(pfdev, 0x750) < 0) /* T60x, T62x, T72x */
+ quirks |= SC_LS_ATTR_CHECK_DISABLE;
+ else if (panfrost_model_cmp(pfdev, 0x880) <= 0) /* T76x, T8xx */
+ quirks |= SC_LS_ALLOW_ATTR_TYPES;
+ }
+
+ if (panfrost_has_hw_feature(pfdev, HW_FEATURE_TLS_HASHING))
+ quirks |= SC_TLS_HASH_ENABLE;
+
+ if (quirks)
+ gpu_write(pfdev, GPU_SHADER_CONFIG, quirks);
+
+
+ quirks = gpu_read(pfdev, GPU_TILER_CONFIG);
+
+ /* Set tiler clock gate override if required */
+ if (panfrost_has_hw_issue(pfdev, HW_ISSUE_T76X_3953))
+ quirks |= TC_CLOCK_GATE_OVERRIDE;
+
+ gpu_write(pfdev, GPU_TILER_CONFIG, quirks);
+
+
+ quirks = gpu_read(pfdev, GPU_L2_MMU_CONFIG);
+
+ /* Limit read & write ID width for AXI */
+ if (panfrost_has_hw_feature(pfdev,
HW_FEATURE_3BIT_EXT_RW_L2_MMU_CONFIG))
+ quirks &= ~(L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_READS |
+ L2_MMU_CONFIG_3BIT_LIMIT_EXTERNAL_WRITES);
+ else
+ quirks &= ~(L2_MMU_CONFIG_LIMIT_EXTERNAL_READS |
+ L2_MMU_CONFIG_LIMIT_EXTERNAL_WRITES);
+
+#if 0
+ if (kbdev->system_coherency == COHERENCY_ACE) {
+ /* Allow memory configuration disparity to be ignored, we
+ * optimize the use of shared memory and thus we expect
+ * some disparity in the memory configuration */
+ quirks |= L2_MMU_CONFIG_ALLOW_SNOOP_DISPARITY;
Well that sounds terrifying; I rather wish my brain had preprocessed
that #if already.
+ }
+#endif
+ gpu_write(pfdev, GPU_L2_MMU_CONFIG, quirks);
+
+ quirks = 0;
+ if ((panfrost_model_eq(pfdev, 0x860) || panfrost_model_eq(pfdev, 0x880))
&&
+ pfdev->features.revision >= 0x2000)
+ quirks |= JM_MAX_JOB_THROTTLE_LIMIT <<
JM_JOB_THROTTLE_LIMIT_SHIFT;
+ else if (panfrost_model_eq(pfdev, 0x6000) &&
+ pfdev->features.coherency_features == COHERENCY_ACE)
+ quirks |= (COHERENCY_ACE_LITE | COHERENCY_ACE) <<
+ JM_FORCE_COHERENCY_FEATURES_SHIFT;
Experience says you can never really trust what ID registers claim about
system integration stuff like coherency, because eventually someone will
get a tieoff wrong and make it all fall apart. If even the vendor driver
has a DT override for it you know you're on thin ice ;)
Ultimately, most of your I/O coherency behaviour will be governed by
what the DMA API thinks (based on "dma-coherent"), so if you end up with
mismatched expectations at the point coherency_features gets set up then
you're liable to have a bad time. See the arm-smmu drivers for prior
examples of handling the equivalent thing.
Robin.
+
+ if (quirks)
+ gpu_write(pfdev, GPU_JM_CONFIG, quirks);
+}
_______________________________________________
dri-devel mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/dri-devel