3
0
mirror of https://github.com/Qortal/Brooklyn.git synced 2025-01-31 23:32:17 +00:00
Brooklyn/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
Raziel K. Crowe 04c1822c0a There is a moose on the mool buff
Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey! Ring the door. Take your seat moosey!
2022-03-15 21:13:23 +05:00

636 lines
17 KiB
C

// SPDX-License-Identifier: MIT
/*
* Copyright © 2014 Intel Corporation
*/
#include "gen8_engine_cs.h"
#include "i915_drv.h"
#include "intel_lrc.h"
#include "intel_gpu_commands.h"
#include "intel_ring.h"
int gen8_emit_flush_rcs(struct i915_request *rq, u32 mode)
{
bool vf_flush_wa = false, dc_flush_wa = false;
u32 *cs, flags = 0;
int len;
flags |= PIPE_CONTROL_CS_STALL;
if (mode & EMIT_FLUSH) {
flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
flags |= PIPE_CONTROL_FLUSH_ENABLE;
}
if (mode & EMIT_INVALIDATE) {
flags |= PIPE_CONTROL_TLB_INVALIDATE;
flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_QW_WRITE;
flags |= PIPE_CONTROL_STORE_DATA_INDEX;
/*
* On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
* pipe control.
*/
if (GRAPHICS_VER(rq->engine->i915) == 9)
vf_flush_wa = true;
/* WaForGAMHang:kbl */
if (IS_KBL_GT_STEP(rq->engine->i915, 0, STEP_C0))
dc_flush_wa = true;
}
len = 6;
if (vf_flush_wa)
len += 6;
if (dc_flush_wa)
len += 12;
cs = intel_ring_begin(rq, len);
if (IS_ERR(cs))
return PTR_ERR(cs);
if (vf_flush_wa)
cs = gen8_emit_pipe_control(cs, 0, 0);
if (dc_flush_wa)
cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
0);
cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
if (dc_flush_wa)
cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
intel_ring_advance(rq, cs);
return 0;
}
int gen8_emit_flush_xcs(struct i915_request *rq, u32 mode)
{
u32 cmd, *cs;
cs = intel_ring_begin(rq, 4);
if (IS_ERR(cs))
return PTR_ERR(cs);
cmd = MI_FLUSH_DW + 1;
/*
* We always require a command barrier so that subsequent
* commands, such as breadcrumb interrupts, are strictly ordered
* wrt the contents of the write cache being flushed to memory
* (and thus being coherent from the CPU).
*/
cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
if (mode & EMIT_INVALIDATE) {
cmd |= MI_INVALIDATE_TLB;
if (rq->engine->class == VIDEO_DECODE_CLASS)
cmd |= MI_INVALIDATE_BSD;
}
*cs++ = cmd;
*cs++ = LRC_PPHWSP_SCRATCH_ADDR;
*cs++ = 0; /* upper addr */
*cs++ = 0; /* value */
intel_ring_advance(rq, cs);
return 0;
}
int gen11_emit_flush_rcs(struct i915_request *rq, u32 mode)
{
if (mode & EMIT_FLUSH) {
u32 *cs;
u32 flags = 0;
flags |= PIPE_CONTROL_CS_STALL;
flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
flags |= PIPE_CONTROL_FLUSH_ENABLE;
flags |= PIPE_CONTROL_QW_WRITE;
flags |= PIPE_CONTROL_STORE_DATA_INDEX;
cs = intel_ring_begin(rq, 6);
if (IS_ERR(cs))
return PTR_ERR(cs);
cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
intel_ring_advance(rq, cs);
}
if (mode & EMIT_INVALIDATE) {
u32 *cs;
u32 flags = 0;
flags |= PIPE_CONTROL_CS_STALL;
flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_TLB_INVALIDATE;
flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_QW_WRITE;
flags |= PIPE_CONTROL_STORE_DATA_INDEX;
cs = intel_ring_begin(rq, 6);
if (IS_ERR(cs))
return PTR_ERR(cs);
cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
intel_ring_advance(rq, cs);
}
return 0;
}
static u32 preparser_disable(bool state)
{
return MI_ARB_CHECK | 1 << 8 | state;
}
static i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine)
{
static const i915_reg_t vd[] = {
GEN12_VD0_AUX_NV,
GEN12_VD1_AUX_NV,
GEN12_VD2_AUX_NV,
GEN12_VD3_AUX_NV,
};
static const i915_reg_t ve[] = {
GEN12_VE0_AUX_NV,
GEN12_VE1_AUX_NV,
};
if (engine->class == VIDEO_DECODE_CLASS)
return vd[engine->instance];
if (engine->class == VIDEO_ENHANCEMENT_CLASS)
return ve[engine->instance];
GEM_BUG_ON("unknown aux_inv reg\n");
return INVALID_MMIO_REG;
}
static u32 *gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs)
{
*cs++ = MI_LOAD_REGISTER_IMM(1);
*cs++ = i915_mmio_reg_offset(inv_reg);
*cs++ = AUX_INV;
*cs++ = MI_NOOP;
return cs;
}
int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
{
if (mode & EMIT_FLUSH) {
u32 flags = 0;
u32 *cs;
flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
flags |= PIPE_CONTROL_FLUSH_L3;
flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
/* Wa_1409600907:tgl,adl-p */
flags |= PIPE_CONTROL_DEPTH_STALL;
flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
flags |= PIPE_CONTROL_FLUSH_ENABLE;
flags |= PIPE_CONTROL_STORE_DATA_INDEX;
flags |= PIPE_CONTROL_QW_WRITE;
flags |= PIPE_CONTROL_CS_STALL;
cs = intel_ring_begin(rq, 6);
if (IS_ERR(cs))
return PTR_ERR(cs);
cs = gen12_emit_pipe_control(cs,
PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
flags, LRC_PPHWSP_SCRATCH_ADDR);
intel_ring_advance(rq, cs);
}
if (mode & EMIT_INVALIDATE) {
u32 flags = 0;
u32 *cs;
flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_TLB_INVALIDATE;
flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_STORE_DATA_INDEX;
flags |= PIPE_CONTROL_QW_WRITE;
flags |= PIPE_CONTROL_CS_STALL;
cs = intel_ring_begin(rq, 8 + 4);
if (IS_ERR(cs))
return PTR_ERR(cs);
/*
* Prevent the pre-parser from skipping past the TLB
* invalidate and loading a stale page for the batch
* buffer / request payload.
*/
*cs++ = preparser_disable(true);
cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
/* hsdes: 1809175790 */
cs = gen12_emit_aux_table_inv(GEN12_GFX_CCS_AUX_NV, cs);
*cs++ = preparser_disable(false);
intel_ring_advance(rq, cs);
}
return 0;
}
int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
{
intel_engine_mask_t aux_inv = 0;
u32 cmd, *cs;
cmd = 4;
if (mode & EMIT_INVALIDATE)
cmd += 2;
if (mode & EMIT_INVALIDATE)
aux_inv = rq->engine->mask & ~BIT(BCS0);
if (aux_inv)
cmd += 2 * hweight32(aux_inv) + 2;
cs = intel_ring_begin(rq, cmd);
if (IS_ERR(cs))
return PTR_ERR(cs);
if (mode & EMIT_INVALIDATE)
*cs++ = preparser_disable(true);
cmd = MI_FLUSH_DW + 1;
/*
* We always require a command barrier so that subsequent
* commands, such as breadcrumb interrupts, are strictly ordered
* wrt the contents of the write cache being flushed to memory
* (and thus being coherent from the CPU).
*/
cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
if (mode & EMIT_INVALIDATE) {
cmd |= MI_INVALIDATE_TLB;
if (rq->engine->class == VIDEO_DECODE_CLASS)
cmd |= MI_INVALIDATE_BSD;
}
*cs++ = cmd;
*cs++ = LRC_PPHWSP_SCRATCH_ADDR;
*cs++ = 0; /* upper addr */
*cs++ = 0; /* value */
if (aux_inv) { /* hsdes: 1809175790 */
struct intel_engine_cs *engine;
unsigned int tmp;
*cs++ = MI_LOAD_REGISTER_IMM(hweight32(aux_inv));
for_each_engine_masked(engine, rq->engine->gt, aux_inv, tmp) {
*cs++ = i915_mmio_reg_offset(aux_inv_reg(engine));
*cs++ = AUX_INV;
}
*cs++ = MI_NOOP;
}
if (mode & EMIT_INVALIDATE)
*cs++ = preparser_disable(false);
intel_ring_advance(rq, cs);
return 0;
}
static u32 preempt_address(struct intel_engine_cs *engine)
{
return (i915_ggtt_offset(engine->status_page.vma) +
I915_GEM_HWS_PREEMPT_ADDR);
}
static u32 hwsp_offset(const struct i915_request *rq)
{
const struct intel_timeline *tl;
/* Before the request is executed, the timeline is fixed */
tl = rcu_dereference_protected(rq->timeline,
!i915_request_signaled(rq));
/* See the comment in i915_request_active_seqno(). */
return page_mask_bits(tl->hwsp_offset) + offset_in_page(rq->hwsp_seqno);
}
int gen8_emit_init_breadcrumb(struct i915_request *rq)
{
u32 *cs;
GEM_BUG_ON(i915_request_has_initial_breadcrumb(rq));
if (!i915_request_timeline(rq)->has_initial_breadcrumb)
return 0;
cs = intel_ring_begin(rq, 6);
if (IS_ERR(cs))
return PTR_ERR(cs);
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
*cs++ = hwsp_offset(rq);
*cs++ = 0;
*cs++ = rq->fence.seqno - 1;
/*
* Check if we have been preempted before we even get started.
*
* After this point i915_request_started() reports true, even if
* we get preempted and so are no longer running.
*
* i915_request_started() is used during preemption processing
* to decide if the request is currently inside the user payload
* or spinning on a kernel semaphore (or earlier). For no-preemption
* requests, we do allow preemption on the semaphore before the user
* payload, but do not allow preemption once the request is started.
*
* i915_request_started() is similarly used during GPU hangs to
* determine if the user's payload was guilty, and if so, the
* request is banned. Before the request is started, it is assumed
* to be unharmed and an innocent victim of another's hang.
*/
*cs++ = MI_NOOP;
*cs++ = MI_ARB_CHECK;
intel_ring_advance(rq, cs);
/* Record the updated position of the request's payload */
rq->infix = intel_ring_offset(rq, cs);
__set_bit(I915_FENCE_FLAG_INITIAL_BREADCRUMB, &rq->fence.flags);
return 0;
}
int gen8_emit_bb_start_noarb(struct i915_request *rq,
u64 offset, u32 len,
const unsigned int flags)
{
u32 *cs;
cs = intel_ring_begin(rq, 4);
if (IS_ERR(cs))
return PTR_ERR(cs);
/*
* WaDisableCtxRestoreArbitration:bdw,chv
*
* We don't need to perform MI_ARB_ENABLE as often as we do (in
* particular all the gen that do not need the w/a at all!), if we
* took care to make sure that on every switch into this context
* (both ordinary and for preemption) that arbitrartion was enabled
* we would be fine. However, for gen8 there is another w/a that
* requires us to not preempt inside GPGPU execution, so we keep
* arbitration disabled for gen8 batches. Arbitration will be
* re-enabled before we close the request
* (engine->emit_fini_breadcrumb).
*/
*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
/* FIXME(BDW+): Address space and security selectors. */
*cs++ = MI_BATCH_BUFFER_START_GEN8 |
(flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
*cs++ = lower_32_bits(offset);
*cs++ = upper_32_bits(offset);
intel_ring_advance(rq, cs);
return 0;
}
int gen8_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len,
const unsigned int flags)
{
u32 *cs;
if (unlikely(i915_request_has_nopreempt(rq)))
return gen8_emit_bb_start_noarb(rq, offset, len, flags);
cs = intel_ring_begin(rq, 6);
if (IS_ERR(cs))
return PTR_ERR(cs);
*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
*cs++ = MI_BATCH_BUFFER_START_GEN8 |
(flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
*cs++ = lower_32_bits(offset);
*cs++ = upper_32_bits(offset);
*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
*cs++ = MI_NOOP;
intel_ring_advance(rq, cs);
return 0;
}
static void assert_request_valid(struct i915_request *rq)
{
struct intel_ring *ring __maybe_unused = rq->ring;
/* Can we unwind this request without appearing to go forwards? */
GEM_BUG_ON(intel_ring_direction(ring, rq->wa_tail, rq->head) <= 0);
}
/*
* Reserve space for 2 NOOPs at the end of each request to be
* used as a workaround for not being allowed to do lite
* restore with HEAD==TAIL (WaIdleLiteRestore).
*/
static u32 *gen8_emit_wa_tail(struct i915_request *rq, u32 *cs)
{
/* Ensure there's always at least one preemption point per-request. */
*cs++ = MI_ARB_CHECK;
*cs++ = MI_NOOP;
rq->wa_tail = intel_ring_offset(rq, cs);
/* Check that entire request is less than half the ring */
assert_request_valid(rq);
return cs;
}
static u32 *emit_preempt_busywait(struct i915_request *rq, u32 *cs)
{
*cs++ = MI_ARB_CHECK; /* trigger IDLE->ACTIVE first */
*cs++ = MI_SEMAPHORE_WAIT |
MI_SEMAPHORE_GLOBAL_GTT |
MI_SEMAPHORE_POLL |
MI_SEMAPHORE_SAD_EQ_SDD;
*cs++ = 0;
*cs++ = preempt_address(rq->engine);
*cs++ = 0;
*cs++ = MI_NOOP;
return cs;
}
static __always_inline u32*
gen8_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs)
{
*cs++ = MI_USER_INTERRUPT;
*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
if (intel_engine_has_semaphores(rq->engine) &&
!intel_uc_uses_guc_submission(&rq->engine->gt->uc))
cs = emit_preempt_busywait(rq, cs);
rq->tail = intel_ring_offset(rq, cs);
assert_ring_tail_valid(rq->ring, rq->tail);
return gen8_emit_wa_tail(rq, cs);
}
static u32 *emit_xcs_breadcrumb(struct i915_request *rq, u32 *cs)
{
return gen8_emit_ggtt_write(cs, rq->fence.seqno, hwsp_offset(rq), 0);
}
u32 *gen8_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
{
return gen8_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs));
}
u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
{
cs = gen8_emit_pipe_control(cs,
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_DC_FLUSH_ENABLE,
0);
/* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
cs = gen8_emit_ggtt_write_rcs(cs,
rq->fence.seqno,
hwsp_offset(rq),
PIPE_CONTROL_FLUSH_ENABLE |
PIPE_CONTROL_CS_STALL);
return gen8_emit_fini_breadcrumb_tail(rq, cs);
}
u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
{
cs = gen8_emit_ggtt_write_rcs(cs,
rq->fence.seqno,
hwsp_offset(rq),
PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_TILE_CACHE_FLUSH |
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_DC_FLUSH_ENABLE |
PIPE_CONTROL_FLUSH_ENABLE);
return gen8_emit_fini_breadcrumb_tail(rq, cs);
}
/*
* Note that the CS instruction pre-parser will not stall on the breadcrumb
* flush and will continue pre-fetching the instructions after it before the
* memory sync is completed. On pre-gen12 HW, the pre-parser will stop at
* BB_START/END instructions, so, even though we might pre-fetch the pre-amble
* of the next request before the memory has been flushed, we're guaranteed that
* we won't access the batch itself too early.
* However, on gen12+ the parser can pre-fetch across the BB_START/END commands,
* so, if the current request is modifying an instruction in the next request on
* the same intel_context, we might pre-fetch and then execute the pre-update
* instruction. To avoid this, the users of self-modifying code should either
* disable the parser around the code emitting the memory writes, via a new flag
* added to MI_ARB_CHECK, or emit the writes from a different intel_context. For
* the in-kernel use-cases we've opted to use a separate context, see
* reloc_gpu() as an example.
* All the above applies only to the instructions themselves. Non-inline data
* used by the instructions is not pre-fetched.
*/
static u32 *gen12_emit_preempt_busywait(struct i915_request *rq, u32 *cs)
{
*cs++ = MI_ARB_CHECK; /* trigger IDLE->ACTIVE first */
*cs++ = MI_SEMAPHORE_WAIT_TOKEN |
MI_SEMAPHORE_GLOBAL_GTT |
MI_SEMAPHORE_POLL |
MI_SEMAPHORE_SAD_EQ_SDD;
*cs++ = 0;
*cs++ = preempt_address(rq->engine);
*cs++ = 0;
*cs++ = 0;
return cs;
}
static __always_inline u32*
gen12_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs)
{
*cs++ = MI_USER_INTERRUPT;
*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
if (intel_engine_has_semaphores(rq->engine) &&
!intel_uc_uses_guc_submission(&rq->engine->gt->uc))
cs = gen12_emit_preempt_busywait(rq, cs);
rq->tail = intel_ring_offset(rq, cs);
assert_ring_tail_valid(rq->ring, rq->tail);
return gen8_emit_wa_tail(rq, cs);
}
u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
{
/* XXX Stalling flush before seqno write; post-sync not */
cs = emit_xcs_breadcrumb(rq, __gen8_emit_flush_dw(cs, 0, 0, 0));
return gen12_emit_fini_breadcrumb_tail(rq, cs);
}
u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
{
cs = gen12_emit_ggtt_write_rcs(cs,
rq->fence.seqno,
hwsp_offset(rq),
PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_TILE_CACHE_FLUSH |
PIPE_CONTROL_FLUSH_L3 |
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
/* Wa_1409600907:tgl */
PIPE_CONTROL_DEPTH_STALL |
PIPE_CONTROL_DC_FLUSH_ENABLE |
PIPE_CONTROL_FLUSH_ENABLE);
return gen12_emit_fini_breadcrumb_tail(rq, cs);
}