From cb3ffc016ca26608efb73411f1366eca4dbdc3fa Mon Sep 17 00:00:00 2001 From: "Raziel K. Crowe" <84860158+CWDSYSTEMS@users.noreply.github.com> Date: Thu, 24 Mar 2022 12:31:43 +0500 Subject: [PATCH] Accommodate GPU wider returns --- drivers/gpu/drm/vc4/vc4_drv.h | 2 +- drivers/gpu/drm/vc4/vc4_plane.c | 133 ++++++++++++++++++++++---------- drivers/usb/host/xhci.c | 16 +--- 3 files changed, 96 insertions(+), 55 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index abd279e1ca..7164017d35 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -386,7 +386,7 @@ struct vc4_plane_state { /* Clipped coordinates of the plane on the display. */ int crtc_x, crtc_y, crtc_w, crtc_h; - /* Clipped area being scanned from in the FB. */ + /* Clipped area being scanned from in the FB in u16.16 format */ u32 src_x, src_y; u32 src_w[2], src_h[2]; diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 0e791620d9..4595c09c78 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -179,9 +179,9 @@ static const struct hvs_format *vc4_get_hvs_format(u32 drm_format) static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst) { - if (dst == src) + if (dst == src >> 16) return VC4_SCALING_NONE; - if (3 * dst >= 2 * src) + if (3 * dst >= 2 * (src >> 16)) return VC4_SCALING_PPF; else return VC4_SCALING_TPZ; @@ -388,14 +388,10 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) for (i = 0; i < num_planes; i++) vc4_state->offsets[i] = bo->paddr + fb->offsets[i]; - /* We don't support subpixel source positioning for scaling, - * but fractional coordinates can be generated by clipping - * so just round for now - */ - vc4_state->src_x = DIV_ROUND_CLOSEST(state->src.x1, 1<<16); - vc4_state->src_y = DIV_ROUND_CLOSEST(state->src.y1, 1<<16); - vc4_state->src_w[0] = DIV_ROUND_CLOSEST(state->src.x2, 1<<16) - vc4_state->src_x; - vc4_state->src_h[0] = DIV_ROUND_CLOSEST(state->src.y2, 1<<16) - vc4_state->src_y; + vc4_state->src_x = state->src.x1; + vc4_state->src_y = state->src.y1; + vc4_state->src_w[0] = state->src.x2 - vc4_state->src_x; + vc4_state->src_h[0] = state->src.y2 - vc4_state->src_y; vc4_state->crtc_x = state->dst.x1; vc4_state->crtc_y = state->dst.y1; @@ -448,7 +444,7 @@ static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst) { u32 scale, recip; - scale = (1 << 16) * src / dst; + scale = src / dst; /* The specs note that while the reciprocal would be defined * as (1<<32)/scale, ~0 is close enough. @@ -462,14 +458,48 @@ static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst) VC4_SET_FIELD(recip, SCALER_TPZ1_RECIP)); } -static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst) +/* phase magnitude bits */ +#define PHASE_BITS 6 + +static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst, u32 xy, int channel, int chroma_offset) { - u32 scale = (1 << 16) * src / dst; + u32 scale = src / dst; + s32 offset, offset2; + s32 phase; + + /* Start the phase at 1/2 pixel from the 1st pixel at src_x. + 1/4 pixel for YUV, plus the offset for chroma siting */ + if (channel) { + /* the phase is relative to scale_src->x, so shift it for display list's x value */ + offset = (xy & 0x1ffff) >> (16 - PHASE_BITS) >> 1; + offset -= chroma_offset >> (17 - PHASE_BITS); + offset += -(1 << PHASE_BITS >> 2); + } else { + /* the phase is relative to scale_src->x, so shift it for display list's x value */ + offset = (xy & 0xffff) >> (16 - PHASE_BITS); + offset += -(1 << PHASE_BITS >> 1); + + /* This is a kludge to make sure the scaling factors are consitent with YUV's luma scaling. + we lose 1bit precision because of this. */ + scale &= ~1; + } + + /* There may be a also small error introduced by precision of scale. + Add half of that as a compromise */ + offset2 = src - dst * scale; + offset2 >>= 16 - PHASE_BITS; + phase = offset + (offset2 >> 1); + + /* Ensure +ve values don't touch the sign bit, then truncate negative values */ + if (phase >= 1 << PHASE_BITS) + phase = (1 << PHASE_BITS) - 1; + + phase &= SCALER_PPF_IPHASE_MASK; vc4_dlist_write(vc4_state, SCALER_PPF_AGC | VC4_SET_FIELD(scale, SCALER_PPF_SCALE) | - VC4_SET_FIELD(0, SCALER_PPF_IPHASE)); + VC4_SET_FIELD(phase, SCALER_PPF_IPHASE)); } static u32 vc4_lbm_size(struct drm_plane_state *state) @@ -494,7 +524,7 @@ static u32 vc4_lbm_size(struct drm_plane_state *state) if (vc4_state->x_scaling[0] == VC4_SCALING_TPZ) pix_per_line = vc4_state->crtc_w; else - pix_per_line = vc4_state->src_w[0]; + pix_per_line = vc4_state->src_w[0] >> 16; if (!vc4_state->is_yuv) { if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ) @@ -528,13 +558,15 @@ static void vc4_write_scaling_parameters(struct drm_plane_state *state, /* Ch0 H-PPF Word 0: Scaling Parameters */ if (vc4_state->x_scaling[channel] == VC4_SCALING_PPF) { vc4_write_ppf(vc4_state, - vc4_state->src_w[channel], vc4_state->crtc_w); + vc4_state->src_w[channel], vc4_state->crtc_w, vc4_state->src_x, channel, + state->chroma_siting_h); } /* Ch0 V-PPF Words 0-1: Scaling Parameters, Context */ if (vc4_state->y_scaling[channel] == VC4_SCALING_PPF) { vc4_write_ppf(vc4_state, - vc4_state->src_h[channel], vc4_state->crtc_h); + vc4_state->src_h[channel], vc4_state->crtc_h, vc4_state->src_y, channel, + state->chroma_siting_v); vc4_dlist_write(vc4_state, 0xc0c0c0c0); } @@ -585,7 +617,8 @@ static void vc4_plane_calc_load(struct drm_plane_state *state) for (i = 0; i < fb->format->num_planes; i++) { /* Even if the bandwidth/plane required for a single frame is * - * vc4_state->src_w[i] * vc4_state->src_h[i] * cpp * vrefresh + * (vc4_state->src_w[i] >> 16) * (vc4_state->src_h[i] >> 16) * + * cpp * vrefresh * * when downscaling, we have to read more pixels per line in * the time frame reserved for a single line, so the bandwidth @@ -594,11 +627,11 @@ static void vc4_plane_calc_load(struct drm_plane_state *state) * load by this number. We're likely over-estimating the read * demand, but that's better than under-estimating it. */ - vscale_factor = DIV_ROUND_UP(vc4_state->src_h[i], + vscale_factor = DIV_ROUND_UP(vc4_state->src_h[i] >> 16, vc4_state->crtc_h); - vc4_state->membus_load += vc4_state->src_w[i] * - vc4_state->src_h[i] * vscale_factor * - fb->format->cpp[i]; + vc4_state->membus_load += (vc4_state->src_w[i] >> 16) * + (vc4_state->src_h[i] >> 16) * + vscale_factor * fb->format->cpp[i]; vc4_state->hvs_load += vc4_state->crtc_h * vc4_state->crtc_w; } @@ -751,7 +784,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane, bool mix_plane_alpha; bool covers_screen; u32 scl0, scl1, pitch0; - u32 tiling, src_y; + u32 tiling, src_x, src_y; + u32 width, height; u32 hvs_format = format->hvs; unsigned int rotation; int ret, i; @@ -763,6 +797,9 @@ static int vc4_plane_mode_set(struct drm_plane *plane, if (ret) return ret; + width = vc4_state->src_w[0] >> 16; + height = vc4_state->src_h[0] >> 16; + /* SCL1 is used for Cb/Cr scaling of planar formats. For RGB * and 4:4:4, scl1 should be set to scl0 so both channels of * the scaler do the same thing. For YUV, the Y plane needs @@ -783,9 +820,11 @@ static int vc4_plane_mode_set(struct drm_plane *plane, DRM_MODE_REFLECT_Y); /* We must point to the last line when Y reflection is enabled. */ - src_y = vc4_state->src_y; + src_y = vc4_state->src_y >> 16; if (rotation & DRM_MODE_REFLECT_Y) - src_y += vc4_state->src_h[0] - 1; + src_y += height - 1; + + src_x = vc4_state->src_x >> 16; switch (base_format_mod) { case DRM_FORMAT_MOD_LINEAR: @@ -800,7 +839,7 @@ static int vc4_plane_mode_set(struct drm_plane *plane, (i ? v_subsample : 1) * fb->pitches[i]; - vc4_state->offsets[i] += vc4_state->src_x / + vc4_state->offsets[i] += src_x / (i ? h_subsample : 1) * fb->format->cpp[i]; } @@ -823,7 +862,7 @@ static int vc4_plane_mode_set(struct drm_plane *plane, * pitch * tile_h == tile_size * tiles_per_row */ u32 tiles_w = fb->pitches[0] >> (tile_size_shift - tile_h_shift); - u32 tiles_l = vc4_state->src_x >> tile_w_shift; + u32 tiles_l = src_x >> tile_w_shift; u32 tiles_r = tiles_w - tiles_l; u32 tiles_t = src_y >> tile_h_shift; /* Intra-tile offsets, which modify the base address (the @@ -833,7 +872,7 @@ static int vc4_plane_mode_set(struct drm_plane *plane, u32 tile_y = (src_y >> 4) & 1; u32 subtile_y = (src_y >> 2) & 3; u32 utile_y = src_y & 3; - u32 x_off = vc4_state->src_x & tile_w_mask; + u32 x_off = src_x & tile_w_mask; u32 y_off = src_y & tile_h_mask; /* When Y reflection is requested we must set the @@ -929,7 +968,7 @@ static int vc4_plane_mode_set(struct drm_plane *plane, * of the 12-pixels in that 128-bit word is the * first pixel to be used */ - u32 remaining_pixels = vc4_state->src_x % 96; + u32 remaining_pixels = src_x % 96; u32 aligned = remaining_pixels / 12; u32 last_bits = remaining_pixels % 12; @@ -951,12 +990,12 @@ static int vc4_plane_mode_set(struct drm_plane *plane, return -EINVAL; } pix_per_tile = tile_w / fb->format->cpp[0]; - x_off = (vc4_state->src_x % pix_per_tile) / + x_off = (src_x % pix_per_tile) / (i ? h_subsample : 1) * fb->format->cpp[i]; } - tile = vc4_state->src_x / pix_per_tile; + tile = src_x / pix_per_tile; vc4_state->offsets[i] += param * tile_w * tile; vc4_state->offsets[i] += src_y / @@ -975,6 +1014,24 @@ static int vc4_plane_mode_set(struct drm_plane *plane, return -EINVAL; } + /* fetch an extra pixel if we don't actually line up with the left edge. */ + if ((vc4_state->src_x & 0xffff) && vc4_state->src_x < (state->fb->width << 16)) + width++; + + /* same for the right side */ + if (((vc4_state->src_x + vc4_state->src_w[0]) & 0xffff) && + vc4_state->src_x + vc4_state->src_w[0] < (state->fb->width << 16)) + width++; + + /* now for the top */ + if ((vc4_state->src_y & 0xffff) && vc4_state->src_y < (state->fb->height << 16)) + height++; + + /* and the bottom */ + if (((vc4_state->src_y + vc4_state->src_h[0]) & 0xffff) && + vc4_state->src_y + vc4_state->src_h[0] < (state->fb->height << 16)) + height++; + /* Don't waste cycles mixing with plane alpha if the set alpha * is opaque or there is no per-pixel alpha information. * In any case we use the alpha property value as the fixed alpha. @@ -1017,10 +1074,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane, vc4_dlist_write(vc4_state, (mix_plane_alpha ? SCALER_POS2_ALPHA_MIX : 0) | vc4_hvs4_get_alpha_blend_mode(state) | - VC4_SET_FIELD(vc4_state->src_w[0], - SCALER_POS2_WIDTH) | - VC4_SET_FIELD(vc4_state->src_h[0], - SCALER_POS2_HEIGHT)); + VC4_SET_FIELD(width, SCALER_POS2_WIDTH) | + VC4_SET_FIELD(height, SCALER_POS2_HEIGHT)); /* Position Word 3: Context. Written by the HVS. */ vc4_dlist_write(vc4_state, 0xc0c0c0c0); @@ -1078,10 +1133,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane, /* Position Word 2: Source Image Size */ vc4_state->pos2_offset = vc4_state->dlist_count; vc4_dlist_write(vc4_state, - VC4_SET_FIELD(vc4_state->src_w[0], - SCALER5_POS2_WIDTH) | - VC4_SET_FIELD(vc4_state->src_h[0], - SCALER5_POS2_HEIGHT)); + VC4_SET_FIELD(width, SCALER5_POS2_WIDTH) | + VC4_SET_FIELD(height, SCALER5_POS2_HEIGHT)); /* Position Word 3: Context. Written by the HVS. */ vc4_dlist_write(vc4_state, 0xc0c0c0c0); @@ -1572,6 +1625,8 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev, DRM_COLOR_YCBCR_BT709, DRM_COLOR_YCBCR_LIMITED_RANGE); + drm_plane_create_chroma_siting_properties(plane, 0, 0); + if (type == DRM_PLANE_TYPE_PRIMARY) drm_plane_create_zpos_immutable_property(plane, 0); diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index dc0651828f..45dfdcdc9f 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1395,12 +1395,9 @@ static void xhci_unmap_temp_buf(struct usb_hcd *hcd, struct urb *urb) static int xhci_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags) { - unsigned int i, maxpacket; - struct scatterlist *sg; struct xhci_hcd *xhci; xhci = hcd_to_xhci(hcd); - maxpacket = usb_endpoint_maxp(&urb->ep->desc); if (xhci_urb_suitable_for_idt(urb)) return 0; @@ -1409,16 +1406,6 @@ static int xhci_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb, if (xhci_urb_temp_buffer_required(hcd, urb)) return xhci_map_temp_buffer(hcd, urb); } - - if (xhci->quirks & XHCI_VLI_SS_BULK_OUT_BUG && - usb_endpoint_is_bulk_out(&urb->ep->desc) && - urb->dev->speed >= USB_SPEED_SUPER && - urb->transfer_buffer_length != 0) { - for_each_sg(urb->sg, sg, urb->num_sgs, i) { - if (sg->length % maxpacket) - return xhci_map_temp_buffer(hcd, urb); - } - } return usb_hcd_map_urb_for_dma(hcd, urb, mem_flags); } @@ -1432,8 +1419,7 @@ static void xhci_unmap_urb_for_dma(struct usb_hcd *hcd, struct urb *urb) if (urb->num_sgs && (urb->transfer_flags & URB_DMA_MAP_SINGLE)) unmap_temp_buf = true; - if ((xhci->quirks & (XHCI_SG_TRB_CACHE_SIZE_QUIRK | XHCI_VLI_SS_BULK_OUT_BUG)) - && unmap_temp_buf) + if ((xhci->quirks & XHCI_SG_TRB_CACHE_SIZE_QUIRK) && unmap_temp_buf) xhci_unmap_temp_buf(hcd, urb); else usb_hcd_unmap_urb_for_dma(hcd, urb);