Skip to content

Commit

Permalink
Accommodate GPU wider returns
Browse files Browse the repository at this point in the history
  • Loading branch information
Raziel K. Crowe authored and Raziel K. Crowe committed Mar 24, 2022
1 parent 9fd22d5 commit cb3ffc0
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 55 deletions.
2 changes: 1 addition & 1 deletion drivers/gpu/drm/vc4/vc4_drv.h
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@ struct vc4_plane_state {

/* Clipped coordinates of the plane on the display. */
int crtc_x, crtc_y, crtc_w, crtc_h;
/* Clipped area being scanned from in the FB. */
/* Clipped area being scanned from in the FB in u16.16 format */
u32 src_x, src_y;

u32 src_w[2], src_h[2];
Expand Down
133 changes: 94 additions & 39 deletions drivers/gpu/drm/vc4/vc4_plane.c
Original file line number Diff line number Diff line change
Expand Up @@ -179,9 +179,9 @@ static const struct hvs_format *vc4_get_hvs_format(u32 drm_format)

static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst)
{
if (dst == src)
if (dst == src >> 16)
return VC4_SCALING_NONE;
if (3 * dst >= 2 * src)
if (3 * dst >= 2 * (src >> 16))
return VC4_SCALING_PPF;
else
return VC4_SCALING_TPZ;
Expand Down Expand Up @@ -388,14 +388,10 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
for (i = 0; i < num_planes; i++)
vc4_state->offsets[i] = bo->paddr + fb->offsets[i];

/* We don't support subpixel source positioning for scaling,
* but fractional coordinates can be generated by clipping
* so just round for now
*/
vc4_state->src_x = DIV_ROUND_CLOSEST(state->src.x1, 1<<16);
vc4_state->src_y = DIV_ROUND_CLOSEST(state->src.y1, 1<<16);
vc4_state->src_w[0] = DIV_ROUND_CLOSEST(state->src.x2, 1<<16) - vc4_state->src_x;
vc4_state->src_h[0] = DIV_ROUND_CLOSEST(state->src.y2, 1<<16) - vc4_state->src_y;
vc4_state->src_x = state->src.x1;
vc4_state->src_y = state->src.y1;
vc4_state->src_w[0] = state->src.x2 - vc4_state->src_x;
vc4_state->src_h[0] = state->src.y2 - vc4_state->src_y;

vc4_state->crtc_x = state->dst.x1;
vc4_state->crtc_y = state->dst.y1;
Expand Down Expand Up @@ -448,7 +444,7 @@ static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst)
{
u32 scale, recip;

scale = (1 << 16) * src / dst;
scale = src / dst;

/* The specs note that while the reciprocal would be defined
* as (1<<32)/scale, ~0 is close enough.
Expand All @@ -462,14 +458,48 @@ static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst)
VC4_SET_FIELD(recip, SCALER_TPZ1_RECIP));
}

static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst)
/* phase magnitude bits */
#define PHASE_BITS 6

static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst, u32 xy, int channel, int chroma_offset)
{
u32 scale = (1 << 16) * src / dst;
u32 scale = src / dst;
s32 offset, offset2;
s32 phase;

/* Start the phase at 1/2 pixel from the 1st pixel at src_x.
1/4 pixel for YUV, plus the offset for chroma siting */
if (channel) {
/* the phase is relative to scale_src->x, so shift it for display list's x value */
offset = (xy & 0x1ffff) >> (16 - PHASE_BITS) >> 1;
offset -= chroma_offset >> (17 - PHASE_BITS);
offset += -(1 << PHASE_BITS >> 2);
} else {
/* the phase is relative to scale_src->x, so shift it for display list's x value */
offset = (xy & 0xffff) >> (16 - PHASE_BITS);
offset += -(1 << PHASE_BITS >> 1);

/* This is a kludge to make sure the scaling factors are consitent with YUV's luma scaling.
we lose 1bit precision because of this. */
scale &= ~1;
}

/* There may be a also small error introduced by precision of scale.
Add half of that as a compromise */
offset2 = src - dst * scale;
offset2 >>= 16 - PHASE_BITS;
phase = offset + (offset2 >> 1);

/* Ensure +ve values don't touch the sign bit, then truncate negative values */
if (phase >= 1 << PHASE_BITS)
phase = (1 << PHASE_BITS) - 1;

phase &= SCALER_PPF_IPHASE_MASK;

vc4_dlist_write(vc4_state,
SCALER_PPF_AGC |
VC4_SET_FIELD(scale, SCALER_PPF_SCALE) |
VC4_SET_FIELD(0, SCALER_PPF_IPHASE));
VC4_SET_FIELD(phase, SCALER_PPF_IPHASE));
}

static u32 vc4_lbm_size(struct drm_plane_state *state)
Expand All @@ -494,7 +524,7 @@ static u32 vc4_lbm_size(struct drm_plane_state *state)
if (vc4_state->x_scaling[0] == VC4_SCALING_TPZ)
pix_per_line = vc4_state->crtc_w;
else
pix_per_line = vc4_state->src_w[0];
pix_per_line = vc4_state->src_w[0] >> 16;

if (!vc4_state->is_yuv) {
if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ)
Expand Down Expand Up @@ -528,13 +558,15 @@ static void vc4_write_scaling_parameters(struct drm_plane_state *state,
/* Ch0 H-PPF Word 0: Scaling Parameters */
if (vc4_state->x_scaling[channel] == VC4_SCALING_PPF) {
vc4_write_ppf(vc4_state,
vc4_state->src_w[channel], vc4_state->crtc_w);
vc4_state->src_w[channel], vc4_state->crtc_w, vc4_state->src_x, channel,
state->chroma_siting_h);
}

/* Ch0 V-PPF Words 0-1: Scaling Parameters, Context */
if (vc4_state->y_scaling[channel] == VC4_SCALING_PPF) {
vc4_write_ppf(vc4_state,
vc4_state->src_h[channel], vc4_state->crtc_h);
vc4_state->src_h[channel], vc4_state->crtc_h, vc4_state->src_y, channel,
state->chroma_siting_v);
vc4_dlist_write(vc4_state, 0xc0c0c0c0);
}

Expand Down Expand Up @@ -585,7 +617,8 @@ static void vc4_plane_calc_load(struct drm_plane_state *state)
for (i = 0; i < fb->format->num_planes; i++) {
/* Even if the bandwidth/plane required for a single frame is
*
* vc4_state->src_w[i] * vc4_state->src_h[i] * cpp * vrefresh
* (vc4_state->src_w[i] >> 16) * (vc4_state->src_h[i] >> 16) *
* cpp * vrefresh
*
* when downscaling, we have to read more pixels per line in
* the time frame reserved for a single line, so the bandwidth
Expand All @@ -594,11 +627,11 @@ static void vc4_plane_calc_load(struct drm_plane_state *state)
* load by this number. We're likely over-estimating the read
* demand, but that's better than under-estimating it.
*/
vscale_factor = DIV_ROUND_UP(vc4_state->src_h[i],
vscale_factor = DIV_ROUND_UP(vc4_state->src_h[i] >> 16,
vc4_state->crtc_h);
vc4_state->membus_load += vc4_state->src_w[i] *
vc4_state->src_h[i] * vscale_factor *
fb->format->cpp[i];
vc4_state->membus_load += (vc4_state->src_w[i] >> 16) *
(vc4_state->src_h[i] >> 16) *
vscale_factor * fb->format->cpp[i];
vc4_state->hvs_load += vc4_state->crtc_h * vc4_state->crtc_w;
}

Expand Down Expand Up @@ -751,7 +784,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
bool mix_plane_alpha;
bool covers_screen;
u32 scl0, scl1, pitch0;
u32 tiling, src_y;
u32 tiling, src_x, src_y;
u32 width, height;
u32 hvs_format = format->hvs;
unsigned int rotation;
int ret, i;
Expand All @@ -763,6 +797,9 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
if (ret)
return ret;

width = vc4_state->src_w[0] >> 16;
height = vc4_state->src_h[0] >> 16;

/* SCL1 is used for Cb/Cr scaling of planar formats. For RGB
* and 4:4:4, scl1 should be set to scl0 so both channels of
* the scaler do the same thing. For YUV, the Y plane needs
Expand All @@ -783,9 +820,11 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
DRM_MODE_REFLECT_Y);

/* We must point to the last line when Y reflection is enabled. */
src_y = vc4_state->src_y;
src_y = vc4_state->src_y >> 16;
if (rotation & DRM_MODE_REFLECT_Y)
src_y += vc4_state->src_h[0] - 1;
src_y += height - 1;

src_x = vc4_state->src_x >> 16;

switch (base_format_mod) {
case DRM_FORMAT_MOD_LINEAR:
Expand All @@ -800,7 +839,7 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
(i ? v_subsample : 1) *
fb->pitches[i];

vc4_state->offsets[i] += vc4_state->src_x /
vc4_state->offsets[i] += src_x /
(i ? h_subsample : 1) *
fb->format->cpp[i];
}
Expand All @@ -823,7 +862,7 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
* pitch * tile_h == tile_size * tiles_per_row
*/
u32 tiles_w = fb->pitches[0] >> (tile_size_shift - tile_h_shift);
u32 tiles_l = vc4_state->src_x >> tile_w_shift;
u32 tiles_l = src_x >> tile_w_shift;
u32 tiles_r = tiles_w - tiles_l;
u32 tiles_t = src_y >> tile_h_shift;
/* Intra-tile offsets, which modify the base address (the
Expand All @@ -833,7 +872,7 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
u32 tile_y = (src_y >> 4) & 1;
u32 subtile_y = (src_y >> 2) & 3;
u32 utile_y = src_y & 3;
u32 x_off = vc4_state->src_x & tile_w_mask;
u32 x_off = src_x & tile_w_mask;
u32 y_off = src_y & tile_h_mask;

/* When Y reflection is requested we must set the
Expand Down Expand Up @@ -929,7 +968,7 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
* of the 12-pixels in that 128-bit word is the
* first pixel to be used
*/
u32 remaining_pixels = vc4_state->src_x % 96;
u32 remaining_pixels = src_x % 96;
u32 aligned = remaining_pixels / 12;
u32 last_bits = remaining_pixels % 12;

Expand All @@ -951,12 +990,12 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
return -EINVAL;
}
pix_per_tile = tile_w / fb->format->cpp[0];
x_off = (vc4_state->src_x % pix_per_tile) /
x_off = (src_x % pix_per_tile) /
(i ? h_subsample : 1) *
fb->format->cpp[i];
}

tile = vc4_state->src_x / pix_per_tile;
tile = src_x / pix_per_tile;

vc4_state->offsets[i] += param * tile_w * tile;
vc4_state->offsets[i] += src_y /
Expand All @@ -975,6 +1014,24 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
return -EINVAL;
}

/* fetch an extra pixel if we don't actually line up with the left edge. */
if ((vc4_state->src_x & 0xffff) && vc4_state->src_x < (state->fb->width << 16))
width++;

/* same for the right side */
if (((vc4_state->src_x + vc4_state->src_w[0]) & 0xffff) &&
vc4_state->src_x + vc4_state->src_w[0] < (state->fb->width << 16))
width++;

/* now for the top */
if ((vc4_state->src_y & 0xffff) && vc4_state->src_y < (state->fb->height << 16))
height++;

/* and the bottom */
if (((vc4_state->src_y + vc4_state->src_h[0]) & 0xffff) &&
vc4_state->src_y + vc4_state->src_h[0] < (state->fb->height << 16))
height++;

/* Don't waste cycles mixing with plane alpha if the set alpha
* is opaque or there is no per-pixel alpha information.
* In any case we use the alpha property value as the fixed alpha.
Expand Down Expand Up @@ -1017,10 +1074,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
vc4_dlist_write(vc4_state,
(mix_plane_alpha ? SCALER_POS2_ALPHA_MIX : 0) |
vc4_hvs4_get_alpha_blend_mode(state) |
VC4_SET_FIELD(vc4_state->src_w[0],
SCALER_POS2_WIDTH) |
VC4_SET_FIELD(vc4_state->src_h[0],
SCALER_POS2_HEIGHT));
VC4_SET_FIELD(width, SCALER_POS2_WIDTH) |
VC4_SET_FIELD(height, SCALER_POS2_HEIGHT));

/* Position Word 3: Context. Written by the HVS. */
vc4_dlist_write(vc4_state, 0xc0c0c0c0);
Expand Down Expand Up @@ -1078,10 +1133,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
/* Position Word 2: Source Image Size */
vc4_state->pos2_offset = vc4_state->dlist_count;
vc4_dlist_write(vc4_state,
VC4_SET_FIELD(vc4_state->src_w[0],
SCALER5_POS2_WIDTH) |
VC4_SET_FIELD(vc4_state->src_h[0],
SCALER5_POS2_HEIGHT));
VC4_SET_FIELD(width, SCALER5_POS2_WIDTH) |
VC4_SET_FIELD(height, SCALER5_POS2_HEIGHT));

/* Position Word 3: Context. Written by the HVS. */
vc4_dlist_write(vc4_state, 0xc0c0c0c0);
Expand Down Expand Up @@ -1572,6 +1625,8 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev,
DRM_COLOR_YCBCR_BT709,
DRM_COLOR_YCBCR_LIMITED_RANGE);

drm_plane_create_chroma_siting_properties(plane, 0, 0);

if (type == DRM_PLANE_TYPE_PRIMARY)
drm_plane_create_zpos_immutable_property(plane, 0);

Expand Down
16 changes: 1 addition & 15 deletions drivers/usb/host/xhci.c
Original file line number Diff line number Diff line change
Expand Up @@ -1395,12 +1395,9 @@ static void xhci_unmap_temp_buf(struct usb_hcd *hcd, struct urb *urb)
static int xhci_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb,
gfp_t mem_flags)
{
unsigned int i, maxpacket;
struct scatterlist *sg;
struct xhci_hcd *xhci;

xhci = hcd_to_xhci(hcd);
maxpacket = usb_endpoint_maxp(&urb->ep->desc);

if (xhci_urb_suitable_for_idt(urb))
return 0;
Expand All @@ -1409,16 +1406,6 @@ static int xhci_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb,
if (xhci_urb_temp_buffer_required(hcd, urb))
return xhci_map_temp_buffer(hcd, urb);
}

if (xhci->quirks & XHCI_VLI_SS_BULK_OUT_BUG &&
usb_endpoint_is_bulk_out(&urb->ep->desc) &&
urb->dev->speed >= USB_SPEED_SUPER &&
urb->transfer_buffer_length != 0) {
for_each_sg(urb->sg, sg, urb->num_sgs, i) {
if (sg->length % maxpacket)
return xhci_map_temp_buffer(hcd, urb);
}
}
return usb_hcd_map_urb_for_dma(hcd, urb, mem_flags);
}

Expand All @@ -1432,8 +1419,7 @@ static void xhci_unmap_urb_for_dma(struct usb_hcd *hcd, struct urb *urb)
if (urb->num_sgs && (urb->transfer_flags & URB_DMA_MAP_SINGLE))
unmap_temp_buf = true;

if ((xhci->quirks & (XHCI_SG_TRB_CACHE_SIZE_QUIRK | XHCI_VLI_SS_BULK_OUT_BUG))
&& unmap_temp_buf)
if ((xhci->quirks & XHCI_SG_TRB_CACHE_SIZE_QUIRK) && unmap_temp_buf)
xhci_unmap_temp_buf(hcd, urb);
else
usb_hcd_unmap_urb_for_dma(hcd, urb);
Expand Down

0 comments on commit cb3ffc0

Please sign in to comment.