diff options
author | Jonathan Gray <jsg@cvs.openbsd.org> | 2023-11-02 04:53:47 +0000 |
---|---|---|
committer | Jonathan Gray <jsg@cvs.openbsd.org> | 2023-11-02 04:53:47 +0000 |
commit | b44518130b33cadb5c1d619e9e936ae0e0dbf7cb (patch) | |
tree | 6069eb03c39fbc79808a7d94f857118cce75cbe3 /lib/mesa/src/freedreno | |
parent | 32aeb3c41fedbbd7b11aacfec48e8f699d16bff0 (diff) |
Merge Mesa 23.1.9
Diffstat (limited to 'lib/mesa/src/freedreno')
43 files changed, 44 insertions, 23088 deletions
diff --git a/lib/mesa/src/freedreno/ci/freedreno-a630-skqp-gl_rendertests-crashes.txt b/lib/mesa/src/freedreno/ci/freedreno-a630-skqp-gl_rendertests-crashes.txt deleted file mode 100644 index 14fbfd657..000000000 --- a/lib/mesa/src/freedreno/ci/freedreno-a630-skqp-gl_rendertests-crashes.txt +++ /dev/null @@ -1,4 +0,0 @@ -gl_blur2rectsnonninepatch -gl_bug339297_as_clip -gl_bug6083 -gl_dashtextcaps diff --git a/lib/mesa/src/freedreno/ci/freedreno-a630-skqp-gl_rendertests-fails.txt b/lib/mesa/src/freedreno/ci/freedreno-a630-skqp-gl_rendertests-fails.txt deleted file mode 100644 index 2f52c09c6..000000000 --- a/lib/mesa/src/freedreno/ci/freedreno-a630-skqp-gl_rendertests-fails.txt +++ /dev/null @@ -1,1379 +0,0 @@ -# Acceptable error - -# Two pixels with wrong color -ovals_as_paths,24 - -# Bad rendering - -arcs_as_paths,-1 -etc1,-1 - -# Model missing - -BlurDrawImage,-1 -CubicStroke,-1 -OverStroke,-1 -aaclip,-1 -aarectmodes,-1 -aaxfermodes,-1 -addarc,-1 -addarc_meas,-1 -analytic_antialias_convex,-1 -analytic_antialias_general,-1 -analytic_gradients,-1 -animated-image-blurs,-1 -anisotropic_hq,-1 -arccirclegap,-1 -arcofzorro,-1 -arcto,-1 -b_119394958,-1 -badpaint,-1 -bezier_conic_effects,-1 -bezier_quad_effects,-1 -beziers,-1 -big_rrect_circle_aa_effect,-1 -big_rrect_circular_corner_aa_effect,-1 -big_rrect_ellipse_aa_effect,-1 -big_rrect_elliptical_corner_aa_effect,-1 -big_rrect_rect_aa_effect,-1 -bigblurs,-1 -bigconic,-1 -bigmatrix,-1 -bigrect,-1 -bigtext,-1 -bigtileimagefilter,-1 -bitmap_subset_shader,-1 -bitmapfilters,-1 -bitmaprect_i,-1 -bitmaprect_s,-1 -bitmaptiled_fractional_horizontal,-1 -bitmaptiled_fractional_vertical,-1 -bleed,-1 -bleed_alpha_bmp,-1 -bleed_alpha_bmp_shader,-1 -bleed_alpha_image,-1 -bleed_alpha_image_shader,-1 -bleed_image,-1 -blur2rects,-1 -blur_ignore_xform_circle,-1 -blur_ignore_xform_rect,-1 -blur_ignore_xform_rrect,-1 -blur_image,-1 -blurcircles,-1 -blurimagevmask,-1 -blurquickreject,-1 -blurrects,-1 -blurredclippedcircle,-1 -blurroundrect-WH-100x100-unevenCorners,-1 -blurs,-1 -bmp_filter_quality_repeat,-1 -bug5099,-1 -bug5252,-1 -bug530095,-1 -bug615686,-1 -bug6987,-1 -c_gms,-1 -chrome_gradtext2,-1 -circle_sizes,-1 -circles,-1 -circular-clips,-1 -circular_arcs_fill,-1 -circular_arcs_hairline,-1 -circular_arcs_stroke_and_fill_butt,-1 -circular_arcs_stroke_and_fill_round,-1 -circular_arcs_stroke_and_fill_square,-1 -circular_arcs_stroke_butt,-1 -circular_arcs_stroke_round,-1 -circular_arcs_stroke_square,-1 -circular_arcs_weird,-1 -clamped_gradients,-1 -clamped_gradients_nodither,-1 -clip_strokerect,-1 -clipcubic,-1 -clippedcubic2,-1 -clockwise,-1 -colorcomposefilter_wacky,-1 -coloremoji,-1 -combo-patheffects,-1 -complexclip2_path_aa,-1 -complexclip2_rect_aa,-1 -complexclip2_rrect_aa,-1 -complexclip2_rrect_bw,-1 -complexclip3_complex,-1 -complexclip3_simple,-1 -complexclip4_aa,-1 -complexclip_aa,-1 -complexclip_aa_invert,-1 -complexclip_aa_layer,-1 -complexclip_aa_layer_invert,-1 -complexclip_bw,-1 -complexclip_bw_invert,-1 -complexclip_bw_layer,-1 -complexclip_bw_layer_invert,-1 -composeshader_bitmap2,-1 -concavepaths,-1 -conicpaths,-1 -const_color_processor,-1 -contour_start,-1 -convex-lineonly-paths,-1 -convex-lineonly-paths-stroke-and-fill,-1 -convex-polygon-inset,-1 -convex_poly_clip,-1 -convex_poly_effect,-1 -convexpaths,-1 -crbug_640176,-1 -crbug_788500,-1 -crbug_847759,-1 -crbug_884166,-1 -crbug_887103,-1 -crbug_888453,-1 -crbug_892988,-1 -cross_context_image,-1 -cubicclosepath,-1 -cubicpath,-1 -daa,-1 -dash_line_zero_off_interval,-1 -dashcircle,-1 -dashcircle2,-1 -dashcubics,-1 -dashing,-1 -dashing2,-1 -dashing3,-1 -dashing4,-1 -dashing5_aa,-1 -dashing5_bw,-1 -degeneratesegments,-1 -dftext,-1 -dftext_blob_persp,-1 -displacement,-1 -dont_clip_to_layer,-1 -downsamplebitmap_checkerboard_high,-1 -downsamplebitmap_checkerboard_low,-1 -downsamplebitmap_checkerboard_medium,-1 -downsamplebitmap_checkerboard_none,-1 -downsamplebitmap_image_high,-1 -downsamplebitmap_image_low,-1 -downsamplebitmap_image_medium,-1 -downsamplebitmap_image_none,-1 -downsamplebitmap_text_high,-1 -downsamplebitmap_text_low,-1 -downsamplebitmap_text_medium,-1 -downsamplebitmap_text_none,-1 -draw-atlas,-1 -drawTextRSXform,-1 -draw_image_set,-1 -draw_image_set_rect_to_rect,-1 -draw_quad_set,-1 -drawable,-1 -drawbitmaprect-imagerect-subset,-1 -drawbitmaprect-subset,-1 -drawlooper,-1 -drawminibitmaprect,-1 -drawminibitmaprect_aa,-1 -drawregionmodes,-1 -dropshadowimagefilter,-1 -drrect,-1 -drrect_small_inner,-1 -dstreadshuffle,-1 -emboss,-1 -emptypath,-1 -extractbitmap,-1 -fancy_gradients,-1 -fancyblobunderline,-1 -fatpathfill,-1 -fillcircle,-1 -filltypes,-1 -filltypespersp,-1 -filterbitmap_checkerboard_192_192,-1 -filterbitmap_checkerboard_32_2,-1 -filterbitmap_checkerboard_32_32,-1 -filterbitmap_checkerboard_32_32_g8,-1 -filterbitmap_checkerboard_32_8,-1 -filterbitmap_checkerboard_4_4,-1 -filterbitmap_image_color_wheel.png,-1 -filterbitmap_image_mandrill_128.png,-1 -filterbitmap_image_mandrill_16.png,-1 -filterbitmap_image_mandrill_256.png,-1 -filterbitmap_image_mandrill_32.png,-1 -filterbitmap_image_mandrill_512.png,-1 -filterbitmap_image_mandrill_64.png,-1 -filterbitmap_image_mandrill_64.png_g8,-1 -filterbitmap_text_10.00pt,-1 -filterbitmap_text_3.00pt,-1 -filterbitmap_text_7.00pt,-1 -filterbug,-1 -filterfastbounds,-1 -filterindiabox,-1 -flippity,-1 -fontcache,-1 -fontcache-mt,-1 -fontmgr_bounds,-1 -fontmgr_bounds_0.75_0,-1 -fontmgr_bounds_1_-0.25,-1 -fontmgr_iter,-1 -fontmgr_match,-1 -fontregen,-1 -fontscaler,-1 -fontscalerdistortable,-1 -fwidth_squircle,-1 -gamma,-1 -getpostextpath,-1 -giantbitmap_clamp_bilerp_rotate,-1 -giantbitmap_clamp_bilerp_scale,-1 -giantbitmap_mirror_bilerp_rotate,-1 -giantbitmap_mirror_bilerp_scale,-1 -giantbitmap_repeat_bilerp_rotate,-1 -giantbitmap_repeat_bilerp_scale,-1 -glyph_pos_h_b,-1 -glyph_pos_h_f,-1 -glyph_pos_h_s,-1 -glyph_pos_n_b,-1 -glyph_pos_n_f,-1 -glyph_pos_n_s,-1 -gradient_dirty_laundry,-1 -gradients,-1 -gradients4f,-1 -gradients4f_nodither,-1 -gradients_2pt_conical_edge,-1 -gradients_2pt_conical_edge_mirror,-1 -gradients_2pt_conical_edge_nodither,-1 -gradients_2pt_conical_edge_repeat,-1 -gradients_2pt_conical_inside,-1 -gradients_2pt_conical_inside_mirror,-1 -gradients_2pt_conical_inside_nodither,-1 -gradients_2pt_conical_inside_repeat,-1 -gradients_2pt_conical_outside,-1 -gradients_2pt_conical_outside_mirror,-1 -gradients_2pt_conical_outside_nodither,-1 -gradients_2pt_conical_outside_repeat,-1 -gradients_degenerate_2pt,-1 -gradients_degenerate_2pt_nodither,-1 -gradients_dup_color_stops,-1 -gradients_local_perspective,-1 -gradients_local_perspective_nodither,-1 -gradients_no_texture,-1 -gradients_no_texture_nodither,-1 -gradients_nodither,-1 -gradients_view_perspective,-1 -gradients_view_perspective_nodither,-1 -hairlines,-1 -hairmodes,-1 -hittestpath,-1 -hsl,-1 -hugebitmapshader,-1 -image-cacherator-from-picture,-1 -image-cacherator-from-raster,-1 -image-cacherator-from-texture,-1 -image-picture,-1 -image-shader,-1 -image_from_yuv_textures,-1 -image_scale_aligned,-1 -imagealphathreshold_image,-1 -imageblur,-1 -imagefilters_xfermodes,-1 -imagefiltersbase,-1 -imagefiltersclipped,-1 -imagefilterscropexpand,-1 -imagefilterscropped,-1 -imagefiltersscaled,-1 -imagefiltersstroked,-1 -imagefilterstransformed,-1 -imagemagnifier,-1 -imagemagnifier_cropped,-1 -imagemakewithfilter,-1 -imageresizetiled,-1 -imagesource,-1 -imagesrc2_low,-1 -innershapes,-1 -innershapes_bw,-1 -internal_links,-1 -inverse_paths,-1 -largecircle,-1 -lattice,-1 -lcdoverlap,-1 -lcdtext,-1 -lighting,-1 -lightingshader2,-1 -lineclosepath,-1 -linepath,-1 -localmatrixshader_nested,-1 -longlinedash,-1 -longpathdash,-1 -longwavyline,-1 -lumafilter,-1 -maddash,-1 -mandoline,-1 -manyarcs,-1 -manycircles,-1 -manyrrects,-1 -matrixconvolution,-1 -matrixconvolution_color,-1 -matriximagefilter,-1 -mipmap,-1 -mixedtextblobs,-1 -morphology,-1 -nested_aa,-1 -nested_flipY_aa,-1 -nested_flipY_bw,-1 -new_texture_image,-1 -ninepatch-stretch,-1 -nonclosedpaths,-1 -ovals,-1 -p3_ovals,-1 -parsedpaths,-1 -patch_image,-1 -path-reverse,-1 -path_huge_crbug_800804,-1 -path_mask_cache,-1 -patheffect,-1 -pathfill,-1 -pathinterior,-1 -pathinvfill,-1 -pathopsinverse,-1 -pathopsskpclip,-1 -pdf_never_embed,-1 -persp_images,-1 -persp_shaders_aa,-1 -persp_shaders_bw,-1 -pictureimagefilter,-1 -pictureshader,-1 -pictureshader_localwrapper,-1 -pixel_snap_combo,-1 -pixel_snap_line,-1 -pixel_snap_point,-1 -pixel_snap_rect,-1 -points,-1 -poly2poly,-1 -polygons,-1 -quadcap,-1 -quadclosepath,-1 -quadpath,-1 -radial_gradient4,-1 -radial_gradient4_nodither,-1 -rectangle_texture,-1 -rects,-1 -rects_as_paths,-1 -repeated_bitmap_jpg,-1 -resizeimagefilter,-1 -rotate_imagefilter,-1 -rotatedcubicpath,-1 -roundrects,-1 -rrect,-1 -rrect_clip_aa,-1 -rrect_clip_draw_paint,-1 -rrect_draw_aa,-1 -rrect_effect,-1 -save_behind,-1 -savelayer_clipmask,-1 -savelayer_coverage,-1 -savelayer_initfromprev,-1 -savelayer_maskfilter,-1 -savelayer_with_backdrop,-1 -scaled_tilemodes_npot,-1 -scaledemoji_rendering,-1 -scaledstrokes,-1 -shadermaskfilter_gradient,-1 -shadermaskfilter_image,-1 -shadermaskfilter_localmatrix,-1 -shadertext3,-1 -shadow_utils,-1 -shadow_utils_gray,-1 -shadow_utils_occl,-1 -shadows,-1 -sharedcorners,-1 -simple-magnification,-1 -simple-offsetimagefilter,-1 -simple-polygon-offset,-1 -simpleaaclip_path,-1 -simpleaaclip_rect,-1 -simpleblurroundrect,-1 -simplerect,-1 -simpleshapes,-1 -skbug_257,-1 -skbug_4868,-1 -skbug_8664,-1 -skinning,-1 -skinning_cached,-1 -skinning_cpu,-1 -skinning_cpu_cached,-1 -smallarc,-1 -smallpaths,-1 -squarehair,-1 -stlouisarch,-1 -stringart,-1 -stroke-fill,-1 -stroke_rect_shader,-1 -strokecircle,-1 -strokedlines,-1 -strokerect,-1 -strokerects,-1 -strokes3,-1 -strokes_poly,-1 -strokes_round,-1 -stroketext,-1 -sweep_tiling,-1 -tall_stretched_bitmaps,-1 -teenyStrokes,-1 -testgradient,-1 -text_scale_skew,-1 -textblob,-1 -textblobgeometrychange,-1 -textbloblooper,-1 -textblobmixedsizes,-1 -textblobmixedsizes_df,-1 -textblobrandomfont,-1 -textblobshader,-1 -textblobtransforms,-1 -textblobuseaftergpufree,-1 -texture_domain_effect,-1 -texture_domain_effect_bilerp,-1 -texture_domain_effect_mipmap,-1 -thinconcavepaths,-1 -thinrects,-1 -thinstrokedrects,-1 -tiledscaledbitmap,-1 -tileimagefilter,-1 -tilemode_decal,-1 -tilemode_gradient,-1 -tilemodes,-1 -tilemodes_npot,-1 -tinyanglearcs,-1 -trickycubicstrokes,-1 -trimpatheffect,-1 -typefacerendering,-1 -typefacerendering_pfa,-1 -typefacerendering_pfb,-1 -typefacestyles_kerning,-1 -varied_text_clipped_lcd,-1 -varied_text_clipped_no_lcd,-1 -varied_text_ignorable_clip_lcd,-1 -varied_text_ignorable_clip_no_lcd,-1 -vertices,-1 -vertices_batching,-1 -vertices_scaled_shader,-1 -verylarge_picture_image,-1 -verylargebitmap,-1 -windowrectangles,-1 -windowrectangles_mask,-1 -xfermodeimagefilter,-1 -xfermodes,-1 -yuv_nv12_to_rgb_effect,-1 -yuv_to_rgb_effect,-1 -zeroPath,-1 -zero_control_stroke,-1 -zero_length_paths_bw,-1 -zero_length_paths_dbl_aa,-1 -zero_length_paths_dbl_bw,-1 -zerolinestroke,-1 -windowrectangles_mask,-1 -s_BlurDrawImage,-1 -s_CubicStroke,-1 -s_OverStroke,-1 -s_aaclip,-1 -s_aarectmodes,-1 -s_aaxfermodes,-1 -s_addarc,-1 -s_addarc_meas,-1 -s_analytic_antialias_convex,-1 -s_analytic_antialias_general,-1 -s_analytic_gradients,-1 -s_animated-image-blurs,-1 -s_anisotropic_hq,-1 -s_arccirclegap,-1 -s_arcofzorro,-1 -s_arcto,-1 -s_b_119394958,-1 -s_badpaint,-1 -s_bezier_conic_effects,-1 -s_bezier_quad_effects,-1 -s_beziers,-1 -s_big_rrect_circle_aa_effect,-1 -s_big_rrect_circular_corner_aa_effect,-1 -s_big_rrect_ellipse_aa_effect,-1 -s_big_rrect_elliptical_corner_aa_effect,-1 -s_big_rrect_rect_aa_effect,-1 -s_bigblurs,-1 -s_bigconic,-1 -s_bigmatrix,-1 -s_bigrect,-1 -s_bigtext,-1 -s_bigtileimagefilter,-1 -s_bitmap_subset_shader,-1 -s_bitmapfilters,-1 -s_bitmaprect_i,-1 -s_bitmaprect_s,-1 -s_bitmaptiled_fractional_horizontal,-1 -s_bitmaptiled_fractional_vertical,-1 -s_bleed,-1 -s_bleed_alpha_bmp,-1 -s_bleed_alpha_bmp_shader,-1 -s_bleed_alpha_image,-1 -s_bleed_alpha_image_shader,-1 -s_bleed_image,-1 -s_blur2rects,-1 -s_blur_ignore_xform_circle,-1 -s_blur_ignore_xform_rect,-1 -s_blur_ignore_xform_rrect,-1 -s_blur_image,-1 -s_blurcircles,-1 -s_blurimagevmask,-1 -s_blurquickreject,-1 -s_blurrects,-1 -s_blurredclippedcircle,-1 -s_blurroundrect-WH-100x100-unevenCorners,-1 -s_blurs,-1 -s_bmp_filter_quality_repeat,-1 -s_bug5099,-1 -s_bug5252,-1 -s_bug530095,-1 -s_bug615686,-1 -s_bug6987,-1 -s_c_gms,-1 -s_chrome_gradtext2,-1 -s_circle_sizes,-1 -s_circles,-1 -s_circular-clips,-1 -s_circular_arcs_fill,-1 -s_circular_arcs_hairline,-1 -s_circular_arcs_stroke_and_fill_butt,-1 -s_circular_arcs_stroke_and_fill_round,-1 -s_circular_arcs_stroke_and_fill_square,-1 -s_circular_arcs_stroke_butt,-1 -s_circular_arcs_stroke_round,-1 -s_circular_arcs_stroke_square,-1 -s_circular_arcs_weird,-1 -s_clamped_gradients,-1 -s_clamped_gradients_nodither,-1 -s_clip_strokerect,-1 -s_clipcubic,-1 -s_clippedcubic2,-1 -s_clockwise,-1 -s_colorcomposefilter_wacky,-1 -s_coloremoji,-1 -s_combo-patheffects,-1 -s_complexclip2_path_aa,-1 -s_complexclip2_rect_aa,-1 -s_complexclip2_rrect_aa,-1 -s_complexclip2_rrect_bw,-1 -s_complexclip3_complex,-1 -s_complexclip3_simple,-1 -s_complexclip4_aa,-1 -s_complexclip_aa,-1 -s_complexclip_aa_invert,-1 -s_complexclip_aa_layer,-1 -s_complexclip_aa_layer_invert,-1 -s_complexclip_bw,-1 -s_complexclip_bw_invert,-1 -s_complexclip_bw_layer,-1 -s_complexclip_bw_layer_invert,-1 -s_composeshader_bitmap2,-1 -s_concavepaths,-1 -s_conicpaths,-1 -s_const_color_processor,-1 -s_contour_start,-1 -s_convex-lineonly-paths,-1 -s_convex-lineonly-paths-stroke-and-fill,-1 -s_convex-polygon-inset,-1 -s_convex_poly_clip,-1 -s_convex_poly_effect,-1 -s_convexpaths,-1 -s_crbug_640176,-1 -s_crbug_788500,-1 -s_crbug_847759,-1 -s_crbug_884166,-1 -s_crbug_887103,-1 -s_crbug_888453,-1 -s_crbug_892988,-1 -s_cross_context_image,-1 -s_cubicclosepath,-1 -s_cubicpath,-1 -s_daa,-1 -s_dash_line_zero_off_interval,-1 -s_dashcircle,-1 -s_dashcircle2,-1 -s_dashcubics,-1 -s_dashing,-1 -s_dashing2,-1 -s_dashing3,-1 -s_dashing4,-1 -s_dashing5_aa,-1 -s_dashing5_bw,-1 -s_degeneratesegments,-1 -s_dftext,-1 -s_dftext_blob_persp,-1 -s_displacement,-1 -s_dont_clip_to_layer,-1 -s_downsamplebitmap_checkerboard_high,-1 -s_downsamplebitmap_checkerboard_low,-1 -s_downsamplebitmap_checkerboard_medium,-1 -s_downsamplebitmap_checkerboard_none,-1 -s_downsamplebitmap_image_high,-1 -s_downsamplebitmap_image_low,-1 -s_downsamplebitmap_image_medium,-1 -s_downsamplebitmap_image_none,-1 -s_downsamplebitmap_text_high,-1 -s_downsamplebitmap_text_low,-1 -s_downsamplebitmap_text_medium,-1 -s_downsamplebitmap_text_none,-1 -s_draw-atlas,-1 -s_drawTextRSXform,-1 -s_draw_image_set,-1 -s_draw_image_set_rect_to_rect,-1 -s_draw_quad_set,-1 -s_drawable,-1 -s_drawbitmaprect-imagerect-subset,-1 -s_drawbitmaprect-subset,-1 -s_drawlooper,-1 -s_drawminibitmaprect,-1 -s_drawminibitmaprect_aa,-1 -s_drawregionmodes,-1 -s_dropshadowimagefilter,-1 -s_drrect,-1 -s_drrect_small_inner,-1 -s_dstreadshuffle,-1 -s_emboss,-1 -s_emptypath,-1 -s_extractbitmap,-1 -s_fancy_gradients,-1 -s_fancyblobunderline,-1 -s_fatpathfill,-1 -s_fillcircle,-1 -s_filltypes,-1 -s_filltypespersp,-1 -s_filterbitmap_checkerboard_192_192,-1 -s_filterbitmap_checkerboard_32_2,-1 -s_filterbitmap_checkerboard_32_32,-1 -s_filterbitmap_checkerboard_32_32_g8,-1 -s_filterbitmap_checkerboard_32_8,-1 -s_filterbitmap_checkerboard_4_4,-1 -s_filterbitmap_image_color_wheel.png,-1 -s_filterbitmap_image_mandrill_128.png,-1 -s_filterbitmap_image_mandrill_16.png,-1 -s_filterbitmap_image_mandrill_256.png,-1 -s_filterbitmap_image_mandrill_32.png,-1 -s_filterbitmap_image_mandrill_512.png,-1 -s_filterbitmap_image_mandrill_64.png,-1 -s_filterbitmap_image_mandrill_64.png_g8,-1 -s_filterbitmap_text_10.00pt,-1 -s_filterbitmap_text_3.00pt,-1 -s_filterbitmap_text_7.00pt,-1 -s_filterbug,-1 -s_filterfastbounds,-1 -s_filterindiabox,-1 -s_flippity,-1 -s_fontcache,-1 -s_fontcache-mt,-1 -s_fontmgr_bounds,-1 -s_fontmgr_bounds_0.75_0,-1 -s_fontmgr_bounds_1_-0.25,-1 -s_fontmgr_iter,-1 -s_fontmgr_match,-1 -s_fontregen,-1 -s_fontscaler,-1 -s_fontscalerdistortable,-1 -s_fwidth_squircle,-1 -s_gamma,-1 -s_getpostextpath,-1 -s_giantbitmap_clamp_bilerp_rotate,-1 -s_giantbitmap_clamp_bilerp_scale,-1 -s_giantbitmap_mirror_bilerp_rotate,-1 -s_giantbitmap_mirror_bilerp_scale,-1 -s_giantbitmap_repeat_bilerp_rotate,-1 -s_giantbitmap_repeat_bilerp_scale,-1 -s_glyph_pos_h_b,-1 -s_glyph_pos_h_f,-1 -s_glyph_pos_h_s,-1 -s_glyph_pos_n_b,-1 -s_glyph_pos_n_f,-1 -s_glyph_pos_n_s,-1 -s_gradient_dirty_laundry,-1 -s_gradients,-1 -s_gradients4f,-1 -s_gradients4f_nodither,-1 -s_gradients_2pt_conical_edge,-1 -s_gradients_2pt_conical_edge_mirror,-1 -s_gradients_2pt_conical_edge_nodither,-1 -s_gradients_2pt_conical_edge_repeat,-1 -s_gradients_2pt_conical_inside,-1 -s_gradients_2pt_conical_inside_mirror,-1 -s_gradients_2pt_conical_inside_nodither,-1 -s_gradients_2pt_conical_inside_repeat,-1 -s_gradients_2pt_conical_outside,-1 -s_gradients_2pt_conical_outside_mirror,-1 -s_gradients_2pt_conical_outside_nodither,-1 -s_gradients_2pt_conical_outside_repeat,-1 -s_gradients_degenerate_2pt,-1 -s_gradients_degenerate_2pt_nodither,-1 -s_gradients_dup_color_stops,-1 -s_gradients_local_perspective,-1 -s_gradients_local_perspective_nodither,-1 -s_gradients_no_texture,-1 -s_gradients_no_texture_nodither,-1 -s_gradients_nodither,-1 -s_gradients_view_perspective,-1 -s_gradients_view_perspective_nodither,-1 -s_hairlines,-1 -s_hairmodes,-1 -s_hittestpath,-1 -s_hsl,-1 -s_hugebitmapshader,-1 -s_image-cacherator-from-picture,-1 -s_image-cacherator-from-raster,-1 -s_image-cacherator-from-texture,-1 -s_image-picture,-1 -s_image-shader,-1 -s_image_from_yuv_textures,-1 -s_image_scale_aligned,-1 -s_imagealphathreshold_image,-1 -s_imageblur,-1 -s_imagefilters_xfermodes,-1 -s_imagefiltersbase,-1 -s_imagefiltersclipped,-1 -s_imagefilterscropexpand,-1 -s_imagefilterscropped,-1 -s_imagefiltersscaled,-1 -s_imagefiltersstroked,-1 -s_imagefilterstransformed,-1 -s_imagemagnifier,-1 -s_imagemagnifier_cropped,-1 -s_imagemakewithfilter,-1 -s_imageresizetiled,-1 -s_imagesource,-1 -s_imagesrc2_low,-1 -s_innershapes,-1 -s_innershapes_bw,-1 -s_internal_links,-1 -s_inverse_paths,-1 -s_largecircle,-1 -s_lattice,-1 -s_lcdoverlap,-1 -s_lcdtext,-1 -s_lighting,-1 -s_lightingshader2,-1 -s_lineclosepath,-1 -s_linepath,-1 -s_localmatrixshader_nested,-1 -s_longlinedash,-1 -s_longpathdash,-1 -s_longwavyline,-1 -s_lumafilter,-1 -s_maddash,-1 -s_mandoline,-1 -s_manyarcs,-1 -s_manycircles,-1 -s_manyrrects,-1 -s_matrixconvolution,-1 -s_matrixconvolution_color,-1 -s_matriximagefilter,-1 -s_mipmap,-1 -s_mixedtextblobs,-1 -s_morphology,-1 -s_nested_aa,-1 -s_nested_flipY_aa,-1 -s_nested_flipY_bw,-1 -s_new_texture_image,-1 -s_ninepatch-stretch,-1 -s_nonclosedpaths,-1 -s_ovals,-1 -s_p3_ovals,-1 -s_parsedpaths,-1 -s_patch_image,-1 -s_path-reverse,-1 -s_path_huge_crbug_800804,-1 -s_path_mask_cache,-1 -s_patheffect,-1 -s_pathfill,-1 -s_pathinterior,-1 -s_pathinvfill,-1 -s_pathopsinverse,-1 -s_pathopsskpclip,-1 -s_pdf_never_embed,-1 -s_persp_images,-1 -s_persp_shaders_aa,-1 -s_persp_shaders_bw,-1 -s_pictureimagefilter,-1 -s_pictureshader,-1 -s_pictureshader_localwrapper,-1 -s_pixel_snap_combo,-1 -s_pixel_snap_line,-1 -s_pixel_snap_point,-1 -s_pixel_snap_rect,-1 -s_points,-1 -s_poly2poly,-1 -s_polygons,-1 -s_quadcap,-1 -s_quadclosepath,-1 -s_quadpath,-1 -s_radial_gradient4,-1 -s_radial_gradient4_nodither,-1 -s_rectangle_texture,-1 -s_rects,-1 -s_rects_as_paths,-1 -s_repeated_bitmap_jpg,-1 -s_resizeimagefilter,-1 -s_rotate_imagefilter,-1 -s_rotatedcubicpath,-1 -s_roundrects,-1 -s_rrect,-1 -s_rrect_clip_aa,-1 -s_rrect_clip_draw_paint,-1 -s_rrect_draw_aa,-1 -s_rrect_effect,-1 -s_save_behind,-1 -s_savelayer_clipmask,-1 -s_savelayer_coverage,-1 -s_savelayer_initfromprev,-1 -s_savelayer_maskfilter,-1 -s_savelayer_with_backdrop,-1 -s_scaled_tilemodes_npot,-1 -s_scaledemoji_rendering,-1 -s_scaledstrokes,-1 -s_shadermaskfilter_gradient,-1 -s_shadermaskfilter_image,-1 -s_shadermaskfilter_localmatrix,-1 -s_shadertext3,-1 -s_shadow_utils,-1 -s_shadow_utils_gray,-1 -s_shadow_utils_occl,-1 -s_shadows,-1 -s_sharedcorners,-1 -s_simple-magnification,-1 -s_simple-offsetimagefilter,-1 -s_simple-polygon-offset,-1 -s_simpleaaclip_path,-1 -s_simpleaaclip_rect,-1 -s_simpleblurroundrect,-1 -s_simplerect,-1 -s_simpleshapes,-1 -s_skbug_257,-1 -s_skbug_4868,-1 -s_skbug_8664,-1 -s_skinning,-1 -s_skinning_cached,-1 -s_skinning_cpu,-1 -s_skinning_cpu_cached,-1 -s_smallarc,-1 -s_smallpaths,-1 -s_squarehair,-1 -s_stlouisarch,-1 -s_stringart,-1 -s_stroke-fill,-1 -s_stroke_rect_shader,-1 -s_strokecircle,-1 -s_strokedlines,-1 -s_strokerect,-1 -s_strokerects,-1 -s_strokes3,-1 -s_strokes_poly,-1 -s_strokes_round,-1 -s_stroketext,-1 -s_sweep_tiling,-1 -s_tall_stretched_bitmaps,-1 -s_teenyStrokes,-1 -s_testgradient,-1 -s_text_scale_skew,-1 -s_textblob,-1 -s_textblobgeometrychange,-1 -s_textbloblooper,-1 -s_textblobmixedsizes,-1 -s_textblobmixedsizes_df,-1 -s_textblobrandomfont,-1 -s_textblobshader,-1 -s_textblobtransforms,-1 -s_textblobuseaftergpufree,-1 -s_texture_domain_effect,-1 -s_texture_domain_effect_bilerp,-1 -s_texture_domain_effect_mipmap,-1 -s_thinconcavepaths,-1 -s_thinrects,-1 -s_thinstrokedrects,-1 -s_tiledscaledbitmap,-1 -s_tileimagefilter,-1 -s_tilemode_decal,-1 -s_tilemode_gradient,-1 -s_tilemodes,-1 -s_tilemodes_npot,-1 -s_tinyanglearcs,-1 -s_trickycubicstrokes,-1 -s_trimpatheffect,-1 -s_typefacerendering,-1 -s_typefacerendering_pfa,-1 -s_typefacerendering_pfb,-1 -s_typefacestyles_kerning,-1 -s_varied_text_clipped_lcd,-1 -s_varied_text_clipped_no_lcd,-1 -s_varied_text_ignorable_clip_lcd,-1 -s_varied_text_ignorable_clip_no_lcd,-1 -s_vertices,-1 -s_vertices_batching,-1 -s_vertices_scaled_shader,-1 -s_verylarge_picture_image,-1 -s_verylargebitmap,-1 -s_windowrectangles,-1 -s_windowrectangles_mask,-1 -s_xfermodeimagefilter,-1 -s_xfermodes,-1 -s_yuv_nv12_to_rgb_effect,-1 -s_yuv_to_rgb_effect,-1 -s_zeroPath,-1 -s_zero_control_stroke,-1 -s_zero_length_paths_bw,-1 -s_zero_length_paths_dbl_aa,-1 -s_zero_length_paths_dbl_bw,-1 -s_zerolinestroke,-1 -windowrectangles_mask,-1 -BlurDrawImage,-1 -CubicStroke,-1 -OverStroke,-1 -aaclip,-1 -aarectmodes,-1 -aaxfermodes,-1 -addarc,-1 -addarc_meas,-1 -analytic_antialias_convex,-1 -analytic_antialias_general,-1 -analytic_gradients,-1 -animated-image-blurs,-1 -anisotropic_hq,-1 -arccirclegap,-1 -arcofzorro,-1 -arcto,-1 -b_119394958,-1 -badpaint,-1 -bezier_conic_effects,-1 -bezier_quad_effects,-1 -beziers,-1 -big_rrect_circle_aa_effect,-1 -big_rrect_circular_corner_aa_effect,-1 -big_rrect_ellipse_aa_effect,-1 -big_rrect_elliptical_corner_aa_effect,-1 -big_rrect_rect_aa_effect,-1 -bigblurs,-1 -bigconic,-1 -bigmatrix,-1 -bigrect,-1 -bigtext,-1 -bigtileimagefilter,-1 -bitmap_subset_shader,-1 -bitmapfilters,-1 -bitmaprect_i,-1 -bitmaprect_s,-1 -bitmaptiled_fractional_horizontal,-1 -bitmaptiled_fractional_vertical,-1 -bleed,-1 -bleed_alpha_bmp,-1 -bleed_alpha_bmp_shader,-1 -bleed_alpha_image,-1 -bleed_alpha_image_shader,-1 -bleed_image,-1 -blur2rects,-1 -blur_ignore_xform_circle,-1 -blur_ignore_xform_rect,-1 -blur_ignore_xform_rrect,-1 -blur_image,-1 -blurcircles,-1 -blurimagevmask,-1 -blurquickreject,-1 -blurrects,-1 -blurredclippedcircle,-1 -blurroundrect-WH-100x100-unevenCorners,-1 -blurs,-1 -bmp_filter_quality_repeat,-1 -bug5099,-1 -bug5252,-1 -bug530095,-1 -bug615686,-1 -bug6987,-1 -c_gms,-1 -chrome_gradtext2,-1 -circle_sizes,-1 -circles,-1 -circular-clips,-1 -circular_arcs_fill,-1 -circular_arcs_hairline,-1 -circular_arcs_stroke_and_fill_butt,-1 -circular_arcs_stroke_and_fill_round,-1 -circular_arcs_stroke_and_fill_square,-1 -circular_arcs_stroke_butt,-1 -circular_arcs_stroke_round,-1 -circular_arcs_stroke_square,-1 -circular_arcs_weird,-1 -clamped_gradients,-1 -clamped_gradients_nodither,-1 -clip_strokerect,-1 -clipcubic,-1 -clippedcubic2,-1 -clockwise,-1 -colorcomposefilter_wacky,-1 -coloremoji,-1 -combo-patheffects,-1 -complexclip2_path_aa,-1 -complexclip2_rect_aa,-1 -complexclip2_rrect_aa,-1 -complexclip2_rrect_bw,-1 -complexclip3_complex,-1 -complexclip3_simple,-1 -complexclip4_aa,-1 -complexclip_aa,-1 -complexclip_aa_invert,-1 -complexclip_aa_layer,-1 -complexclip_aa_layer_invert,-1 -complexclip_bw,-1 -complexclip_bw_invert,-1 -complexclip_bw_layer,-1 -complexclip_bw_layer_invert,-1 -composeshader_bitmap2,-1 -concavepaths,-1 -conicpaths,-1 -const_color_processor,-1 -contour_start,-1 -convex-lineonly-paths,-1 -convex-lineonly-paths-stroke-and-fill,-1 -convex-polygon-inset,-1 -convex_poly_clip,-1 -convex_poly_effect,-1 -convexpaths,-1 -crbug_640176,-1 -crbug_788500,-1 -crbug_847759,-1 -crbug_884166,-1 -crbug_887103,-1 -crbug_888453,-1 -crbug_892988,-1 -cross_context_image,-1 -cubicclosepath,-1 -cubicpath,-1 -daa,-1 -dash_line_zero_off_interval,-1 -dashcircle,-1 -dashcircle2,-1 -dashcubics,-1 -dashing,-1 -dashing2,-1 -dashing3,-1 -dashing4,-1 -dashing5_aa,-1 -dashing5_bw,-1 -degeneratesegments,-1 -dftext,-1 -dftext_blob_persp,-1 -displacement,-1 -dont_clip_to_layer,-1 -downsamplebitmap_checkerboard_high,-1 -downsamplebitmap_checkerboard_low,-1 -downsamplebitmap_checkerboard_medium,-1 -downsamplebitmap_checkerboard_none,-1 -downsamplebitmap_image_high,-1 -downsamplebitmap_image_low,-1 -downsamplebitmap_image_medium,-1 -downsamplebitmap_image_none,-1 -downsamplebitmap_text_high,-1 -downsamplebitmap_text_low,-1 -downsamplebitmap_text_medium,-1 -downsamplebitmap_text_none,-1 -draw-atlas,-1 -drawTextRSXform,-1 -draw_image_set,-1 -draw_image_set_rect_to_rect,-1 -draw_quad_set,-1 -drawable,-1 -drawbitmaprect-imagerect-subset,-1 -drawbitmaprect-subset,-1 -drawlooper,-1 -drawminibitmaprect,-1 -drawminibitmaprect_aa,-1 -drawregionmodes,-1 -dropshadowimagefilter,-1 -drrect,-1 -drrect_small_inner,-1 -dstreadshuffle,-1 -emboss,-1 -emptypath,-1 -extractbitmap,-1 -fancy_gradients,-1 -fancyblobunderline,-1 -fatpathfill,-1 -fillcircle,-1 -filltypes,-1 -filltypespersp,-1 -filterbitmap_checkerboard_192_192,-1 -filterbitmap_checkerboard_32_2,-1 -filterbitmap_checkerboard_32_32,-1 -filterbitmap_checkerboard_32_32_g8,-1 -filterbitmap_checkerboard_32_8,-1 -filterbitmap_checkerboard_4_4,-1 -filterbitmap_image_color_wheel.png,-1 -filterbitmap_image_mandrill_128.png,-1 -filterbitmap_image_mandrill_16.png,-1 -filterbitmap_image_mandrill_256.png,-1 -filterbitmap_image_mandrill_32.png,-1 -filterbitmap_image_mandrill_512.png,-1 -filterbitmap_image_mandrill_64.png,-1 -filterbitmap_image_mandrill_64.png_g8,-1 -filterbitmap_text_10.00pt,-1 -filterbitmap_text_3.00pt,-1 -filterbitmap_text_7.00pt,-1 -filterbug,-1 -filterfastbounds,-1 -filterindiabox,-1 -flippity,-1 -fontcache,-1 -fontcache-mt,-1 -fontmgr_bounds,-1 -fontmgr_bounds_0.75_0,-1 -fontmgr_bounds_1_-0.25,-1 -fontmgr_iter,-1 -fontmgr_match,-1 -fontregen,-1 -fontscaler,-1 -fontscalerdistortable,-1 -fwidth_squircle,-1 -gamma,-1 -getpostextpath,-1 -giantbitmap_clamp_bilerp_rotate,-1 -giantbitmap_clamp_bilerp_scale,-1 -giantbitmap_mirror_bilerp_rotate,-1 -giantbitmap_mirror_bilerp_scale,-1 -giantbitmap_repeat_bilerp_rotate,-1 -giantbitmap_repeat_bilerp_scale,-1 -glyph_pos_h_b,-1 -glyph_pos_h_f,-1 -glyph_pos_h_s,-1 -glyph_pos_n_b,-1 -glyph_pos_n_f,-1 -glyph_pos_n_s,-1 -gradient_dirty_laundry,-1 -gradients,-1 -gradients4f,-1 -gradients4f_nodither,-1 -gradients_2pt_conical_edge,-1 -gradients_2pt_conical_edge_mirror,-1 -gradients_2pt_conical_edge_nodither,-1 -gradients_2pt_conical_edge_repeat,-1 -gradients_2pt_conical_inside,-1 -gradients_2pt_conical_inside_mirror,-1 -gradients_2pt_conical_inside_nodither,-1 -gradients_2pt_conical_inside_repeat,-1 -gradients_2pt_conical_outside,-1 -gradients_2pt_conical_outside_mirror,-1 -gradients_2pt_conical_outside_nodither,-1 -gradients_2pt_conical_outside_repeat,-1 -gradients_degenerate_2pt,-1 -gradients_degenerate_2pt_nodither,-1 -gradients_dup_color_stops,-1 -gradients_local_perspective,-1 -gradients_local_perspective_nodither,-1 -gradients_no_texture,-1 -gradients_no_texture_nodither,-1 -gradients_nodither,-1 -gradients_view_perspective,-1 -gradients_view_perspective_nodither,-1 -hairlines,-1 -hairmodes,-1 -hittestpath,-1 -hsl,-1 -hugebitmapshader,-1 -image-cacherator-from-picture,-1 -image-cacherator-from-raster,-1 -image-cacherator-from-texture,-1 -image-picture,-1 -image-shader,-1 -image_from_yuv_textures,-1 -image_scale_aligned,-1 -imagealphathreshold_image,-1 -imageblur,-1 -imagefilters_xfermodes,-1 -imagefiltersbase,-1 -imagefiltersclipped,-1 -imagefilterscropexpand,-1 -imagefilterscropped,-1 -imagefiltersscaled,-1 -imagefiltersstroked,-1 -imagefilterstransformed,-1 -imagemagnifier,-1 -imagemagnifier_cropped,-1 -imagemakewithfilter,-1 -imageresizetiled,-1 -imagesource,-1 -imagesrc2_low,-1 -innershapes,-1 -innershapes_bw,-1 -internal_links,-1 -inverse_paths,-1 -largecircle,-1 -lattice,-1 -lcdoverlap,-1 -lcdtext,-1 -lighting,-1 -lightingshader2,-1 -lineclosepath,-1 -linepath,-1 -localmatrixshader_nested,-1 -longlinedash,-1 -longpathdash,-1 -longwavyline,-1 -lumafilter,-1 -maddash,-1 -mandoline,-1 -manyarcs,-1 -manycircles,-1 -manyrrects,-1 -matrixconvolution,-1 -matrixconvolution_color,-1 -matriximagefilter,-1 -mipmap,-1 -mixedtextblobs,-1 -morphology,-1 -nested_aa,-1 -nested_flipY_aa,-1 -nested_flipY_bw,-1 -new_texture_image,-1 -ninepatch-stretch,-1 -nonclosedpaths,-1 -ovals,-1 -p3_ovals,-1 -parsedpaths,-1 -patch_image,-1 -path-reverse,-1 -path_huge_crbug_800804,-1 -path_mask_cache,-1 -patheffect,-1 -pathfill,-1 -pathinterior,-1 -pathinvfill,-1 -pathopsinverse,-1 -pathopsskpclip,-1 -pdf_never_embed,-1 -persp_images,-1 -persp_shaders_aa,-1 -persp_shaders_bw,-1 -pictureimagefilter,-1 -pictureshader,-1 -pictureshader_localwrapper,-1 -pixel_snap_combo,-1 -pixel_snap_line,-1 -pixel_snap_point,-1 -pixel_snap_rect,-1 -points,-1 -poly2poly,-1 -polygons,-1 -quadcap,-1 -quadclosepath,-1 -quadpath,-1 -radial_gradient4,-1 -radial_gradient4_nodither,-1 -rectangle_texture,-1 -rects,-1 -rects_as_paths,-1 -repeated_bitmap_jpg,-1 -resizeimagefilter,-1 -rotate_imagefilter,-1 -rotatedcubicpath,-1 -roundrects,-1 -rrect,-1 -rrect_clip_aa,-1 -rrect_clip_draw_paint,-1 -rrect_draw_aa,-1 -rrect_effect,-1 -save_behind,-1 -savelayer_clipmask,-1 -savelayer_coverage,-1 -savelayer_initfromprev,-1 -savelayer_maskfilter,-1 -savelayer_with_backdrop,-1 -scaled_tilemodes_npot,-1 -scaledemoji_rendering,-1 -scaledstrokes,-1 -shadermaskfilter_gradient,-1 -shadermaskfilter_image,-1 -shadermaskfilter_localmatrix,-1 -shadertext3,-1 -shadow_utils,-1 -shadow_utils_gray,-1 -shadow_utils_occl,-1 -shadows,-1 -sharedcorners,-1 -simple-magnification,-1 -simple-offsetimagefilter,-1 -simple-polygon-offset,-1 -simpleaaclip_path,-1 -simpleaaclip_rect,-1 -simpleblurroundrect,-1 -simplerect,-1 -simpleshapes,-1 -skbug_257,-1 -skbug_4868,-1 -skbug_8664,-1 -skinning,-1 -skinning_cached,-1 -skinning_cpu,-1 -skinning_cpu_cached,-1 -smallarc,-1 -smallpaths,-1 -squarehair,-1 -stlouisarch,-1 -stringart,-1 -stroke-fill,-1 -stroke_rect_shader,-1 -strokecircle,-1 -strokedlines,-1 -strokerect,-1 -strokerects,-1 -strokes3,-1 -strokes_poly,-1 -strokes_round,-1 -stroketext,-1 -sweep_tiling,-1 -tall_stretched_bitmaps,-1 -teenyStrokes,-1 -testgradient,-1 -text_scale_skew,-1 -textblob,-1 -textblobgeometrychange,-1 -textbloblooper,-1 -textblobmixedsizes,-1 -textblobmixedsizes_df,-1 -textblobrandomfont,-1 -textblobshader,-1 -textblobtransforms,-1 -textblobuseaftergpufree,-1 -texture_domain_effect,-1 -texture_domain_effect_bilerp,-1 -texture_domain_effect_mipmap,-1 -thinconcavepaths,-1 -thinrects,-1 -thinstrokedrects,-1 -tiledscaledbitmap,-1 -tileimagefilter,-1 -tilemode_decal,-1 -tilemode_gradient,-1 -tilemodes,-1 -tilemodes_npot,-1 -tinyanglearcs,-1 -trickycubicstrokes,-1 -trimpatheffect,-1 -typefacerendering,-1 -typefacerendering_pfa,-1 -typefacerendering_pfb,-1 -typefacestyles_kerning,-1 -varied_text_clipped_lcd,-1 -varied_text_clipped_no_lcd,-1 -varied_text_ignorable_clip_lcd,-1 -varied_text_ignorable_clip_no_lcd,-1 -vertices,-1 -vertices_batching,-1 -vertices_scaled_shader,-1 -verylarge_picture_image,-1 -verylargebitmap,-1 -windowrectangles,-1 -windowrectangles_mask,-1 -xfermodeimagefilter,-1 -xfermodes,-1 -yuv_nv12_to_rgb_effect,-1 -yuv_to_rgb_effect,-1 -zeroPath,-1 -zero_control_stroke,-1 -zero_length_paths_bw,-1 -zero_length_paths_dbl_aa,-1 -zero_length_paths_dbl_bw,-1 -zerolinestroke,-1 diff --git a/lib/mesa/src/freedreno/ci/freedreno-a630-skqp-gl_rendertests-flakes.txt b/lib/mesa/src/freedreno/ci/freedreno-a630-skqp-gl_rendertests-flakes.txt deleted file mode 100644 index 6362cb0c7..000000000 --- a/lib/mesa/src/freedreno/ci/freedreno-a630-skqp-gl_rendertests-flakes.txt +++ /dev/null @@ -1,8 +0,0 @@ -# The following test group pass on the first 3 runs, but may flake eventually. -# This is an assumption, since those ones were set as failed by Android CTS -blur2rectsnonninepatch -bug339297_as_clip -bug6083 -cliperror -dashtextcaps -largeglyphblur diff --git a/lib/mesa/src/freedreno/ci/freedreno-a630-skqp-gles_rendertests-fails.txt b/lib/mesa/src/freedreno/ci/freedreno-a630-skqp-gles_rendertests-fails.txt deleted file mode 100644 index baef60167..000000000 --- a/lib/mesa/src/freedreno/ci/freedreno-a630-skqp-gles_rendertests-fails.txt +++ /dev/null @@ -1,1377 +0,0 @@ -# Bad rendering - -arcs_as_paths,-1 -etc1,-1 -radial_gradient_precision,-1 - -# Model missing - -BlurDrawImage,-1 -CubicStroke,-1 -OverStroke,-1 -aaclip,-1 -aarectmodes,-1 -aaxfermodes,-1 -addarc,-1 -addarc_meas,-1 -analytic_antialias_convex,-1 -analytic_antialias_general,-1 -analytic_gradients,-1 -animated-image-blurs,-1 -anisotropic_hq,-1 -arccirclegap,-1 -arcofzorro,-1 -arcto,-1 -b_119394958,-1 -badpaint,-1 -bezier_conic_effects,-1 -bezier_quad_effects,-1 -beziers,-1 -big_rrect_circle_aa_effect,-1 -big_rrect_circular_corner_aa_effect,-1 -big_rrect_ellipse_aa_effect,-1 -big_rrect_elliptical_corner_aa_effect,-1 -big_rrect_rect_aa_effect,-1 -bigblurs,-1 -bigconic,-1 -bigmatrix,-1 -bigrect,-1 -bigtext,-1 -bigtileimagefilter,-1 -bitmap_subset_shader,-1 -bitmapfilters,-1 -bitmaprect_i,-1 -bitmaprect_s,-1 -bitmaptiled_fractional_horizontal,-1 -bitmaptiled_fractional_vertical,-1 -bleed,-1 -bleed_alpha_bmp,-1 -bleed_alpha_bmp_shader,-1 -bleed_alpha_image,-1 -bleed_alpha_image_shader,-1 -bleed_image,-1 -blur2rects,-1 -blur_ignore_xform_circle,-1 -blur_ignore_xform_rect,-1 -blur_ignore_xform_rrect,-1 -blur_image,-1 -blurcircles,-1 -blurimagevmask,-1 -blurquickreject,-1 -blurrects,-1 -blurredclippedcircle,-1 -blurroundrect-WH-100x100-unevenCorners,-1 -blurs,-1 -bmp_filter_quality_repeat,-1 -bug5099,-1 -bug5252,-1 -bug530095,-1 -bug615686,-1 -bug6987,-1 -c_gms,-1 -chrome_gradtext2,-1 -circle_sizes,-1 -circles,-1 -circular-clips,-1 -circular_arcs_fill,-1 -circular_arcs_hairline,-1 -circular_arcs_stroke_and_fill_butt,-1 -circular_arcs_stroke_and_fill_round,-1 -circular_arcs_stroke_and_fill_square,-1 -circular_arcs_stroke_butt,-1 -circular_arcs_stroke_round,-1 -circular_arcs_stroke_square,-1 -circular_arcs_weird,-1 -clamped_gradients,-1 -clamped_gradients_nodither,-1 -clip_strokerect,-1 -clipcubic,-1 -clippedcubic2,-1 -clockwise,-1 -colorcomposefilter_wacky,-1 -coloremoji,-1 -combo-patheffects,-1 -complexclip2_path_aa,-1 -complexclip2_rect_aa,-1 -complexclip2_rrect_aa,-1 -complexclip2_rrect_bw,-1 -complexclip3_complex,-1 -complexclip3_simple,-1 -complexclip4_aa,-1 -complexclip_aa,-1 -complexclip_aa_invert,-1 -complexclip_aa_layer,-1 -complexclip_aa_layer_invert,-1 -complexclip_bw,-1 -complexclip_bw_invert,-1 -complexclip_bw_layer,-1 -complexclip_bw_layer_invert,-1 -composeshader_bitmap2,-1 -concavepaths,-1 -conicpaths,-1 -const_color_processor,-1 -contour_start,-1 -convex-lineonly-paths,-1 -convex-lineonly-paths-stroke-and-fill,-1 -convex-polygon-inset,-1 -convex_poly_clip,-1 -convex_poly_effect,-1 -convexpaths,-1 -crbug_640176,-1 -crbug_788500,-1 -crbug_847759,-1 -crbug_884166,-1 -crbug_887103,-1 -crbug_888453,-1 -crbug_892988,-1 -cross_context_image,-1 -cubicclosepath,-1 -cubicpath,-1 -daa,-1 -dash_line_zero_off_interval,-1 -dashcircle,-1 -dashcircle2,-1 -dashcubics,-1 -dashing,-1 -dashing2,-1 -dashing3,-1 -dashing4,-1 -dashing5_aa,-1 -dashing5_bw,-1 -degeneratesegments,-1 -dftext,-1 -dftext_blob_persp,-1 -displacement,-1 -dont_clip_to_layer,-1 -downsamplebitmap_checkerboard_high,-1 -downsamplebitmap_checkerboard_low,-1 -downsamplebitmap_checkerboard_medium,-1 -downsamplebitmap_checkerboard_none,-1 -downsamplebitmap_image_high,-1 -downsamplebitmap_image_low,-1 -downsamplebitmap_image_medium,-1 -downsamplebitmap_image_none,-1 -downsamplebitmap_text_high,-1 -downsamplebitmap_text_low,-1 -downsamplebitmap_text_medium,-1 -downsamplebitmap_text_none,-1 -draw-atlas,-1 -drawTextRSXform,-1 -draw_image_set,-1 -draw_image_set_rect_to_rect,-1 -draw_quad_set,-1 -drawable,-1 -drawbitmaprect-imagerect-subset,-1 -drawbitmaprect-subset,-1 -drawlooper,-1 -drawminibitmaprect,-1 -drawminibitmaprect_aa,-1 -drawregionmodes,-1 -dropshadowimagefilter,-1 -drrect,-1 -drrect_small_inner,-1 -dstreadshuffle,-1 -emboss,-1 -emptypath,-1 -extractbitmap,-1 -fancy_gradients,-1 -fancyblobunderline,-1 -fatpathfill,-1 -fillcircle,-1 -filltypes,-1 -filltypespersp,-1 -filterbitmap_checkerboard_192_192,-1 -filterbitmap_checkerboard_32_2,-1 -filterbitmap_checkerboard_32_32,-1 -filterbitmap_checkerboard_32_32_g8,-1 -filterbitmap_checkerboard_32_8,-1 -filterbitmap_checkerboard_4_4,-1 -filterbitmap_image_color_wheel.png,-1 -filterbitmap_image_mandrill_128.png,-1 -filterbitmap_image_mandrill_16.png,-1 -filterbitmap_image_mandrill_256.png,-1 -filterbitmap_image_mandrill_32.png,-1 -filterbitmap_image_mandrill_512.png,-1 -filterbitmap_image_mandrill_64.png,-1 -filterbitmap_image_mandrill_64.png_g8,-1 -filterbitmap_text_10.00pt,-1 -filterbitmap_text_3.00pt,-1 -filterbitmap_text_7.00pt,-1 -filterbug,-1 -filterfastbounds,-1 -filterindiabox,-1 -flippity,-1 -fontcache,-1 -fontcache-mt,-1 -fontmgr_bounds,-1 -fontmgr_bounds_0.75_0,-1 -fontmgr_bounds_1_-0.25,-1 -fontmgr_iter,-1 -fontmgr_match,-1 -fontregen,-1 -fontscaler,-1 -fontscalerdistortable,-1 -fwidth_squircle,-1 -gamma,-1 -getpostextpath,-1 -giantbitmap_clamp_bilerp_rotate,-1 -giantbitmap_clamp_bilerp_scale,-1 -giantbitmap_mirror_bilerp_rotate,-1 -giantbitmap_mirror_bilerp_scale,-1 -giantbitmap_repeat_bilerp_rotate,-1 -giantbitmap_repeat_bilerp_scale,-1 -glyph_pos_h_b,-1 -glyph_pos_h_f,-1 -glyph_pos_h_s,-1 -glyph_pos_n_b,-1 -glyph_pos_n_f,-1 -glyph_pos_n_s,-1 -gradient_dirty_laundry,-1 -gradients,-1 -gradients4f,-1 -gradients4f_nodither,-1 -gradients_2pt_conical_edge,-1 -gradients_2pt_conical_edge_mirror,-1 -gradients_2pt_conical_edge_nodither,-1 -gradients_2pt_conical_edge_repeat,-1 -gradients_2pt_conical_inside,-1 -gradients_2pt_conical_inside_mirror,-1 -gradients_2pt_conical_inside_nodither,-1 -gradients_2pt_conical_inside_repeat,-1 -gradients_2pt_conical_outside,-1 -gradients_2pt_conical_outside_mirror,-1 -gradients_2pt_conical_outside_nodither,-1 -gradients_2pt_conical_outside_repeat,-1 -gradients_degenerate_2pt,-1 -gradients_degenerate_2pt_nodither,-1 -gradients_dup_color_stops,-1 -gradients_local_perspective,-1 -gradients_local_perspective_nodither,-1 -gradients_no_texture,-1 -gradients_no_texture_nodither,-1 -gradients_nodither,-1 -gradients_view_perspective,-1 -gradients_view_perspective_nodither,-1 -hairlines,-1 -hairmodes,-1 -hittestpath,-1 -hsl,-1 -hugebitmapshader,-1 -image-cacherator-from-picture,-1 -image-cacherator-from-raster,-1 -image-cacherator-from-texture,-1 -image-picture,-1 -image-shader,-1 -image_from_yuv_textures,-1 -image_scale_aligned,-1 -imagealphathreshold_image,-1 -imageblur,-1 -imagefilters_xfermodes,-1 -imagefiltersbase,-1 -imagefiltersclipped,-1 -imagefilterscropexpand,-1 -imagefilterscropped,-1 -imagefiltersscaled,-1 -imagefiltersstroked,-1 -imagefilterstransformed,-1 -imagemagnifier,-1 -imagemagnifier_cropped,-1 -imagemakewithfilter,-1 -imageresizetiled,-1 -imagesource,-1 -imagesrc2_low,-1 -innershapes,-1 -innershapes_bw,-1 -internal_links,-1 -inverse_paths,-1 -largecircle,-1 -lattice,-1 -lcdoverlap,-1 -lcdtext,-1 -lighting,-1 -lightingshader2,-1 -lineclosepath,-1 -linepath,-1 -localmatrixshader_nested,-1 -longlinedash,-1 -longpathdash,-1 -longwavyline,-1 -lumafilter,-1 -maddash,-1 -mandoline,-1 -manyarcs,-1 -manycircles,-1 -manyrrects,-1 -matrixconvolution,-1 -matrixconvolution_color,-1 -matriximagefilter,-1 -mipmap,-1 -mixedtextblobs,-1 -morphology,-1 -nested_aa,-1 -nested_flipY_aa,-1 -nested_flipY_bw,-1 -new_texture_image,-1 -ninepatch-stretch,-1 -nonclosedpaths,-1 -ovals,-1 -p3_ovals,-1 -parsedpaths,-1 -patch_image,-1 -path-reverse,-1 -path_huge_crbug_800804,-1 -path_mask_cache,-1 -patheffect,-1 -pathfill,-1 -pathinterior,-1 -pathinvfill,-1 -pathopsinverse,-1 -pathopsskpclip,-1 -pdf_never_embed,-1 -persp_images,-1 -persp_shaders_aa,-1 -persp_shaders_bw,-1 -pictureimagefilter,-1 -pictureshader,-1 -pictureshader_localwrapper,-1 -pixel_snap_combo,-1 -pixel_snap_line,-1 -pixel_snap_point,-1 -pixel_snap_rect,-1 -points,-1 -poly2poly,-1 -polygons,-1 -quadcap,-1 -quadclosepath,-1 -quadpath,-1 -radial_gradient4,-1 -radial_gradient4_nodither,-1 -rectangle_texture,-1 -rects,-1 -rects_as_paths,-1 -repeated_bitmap_jpg,-1 -resizeimagefilter,-1 -rotate_imagefilter,-1 -rotatedcubicpath,-1 -roundrects,-1 -rrect,-1 -rrect_clip_aa,-1 -rrect_clip_draw_paint,-1 -rrect_draw_aa,-1 -rrect_effect,-1 -save_behind,-1 -savelayer_clipmask,-1 -savelayer_coverage,-1 -savelayer_initfromprev,-1 -savelayer_maskfilter,-1 -savelayer_with_backdrop,-1 -scaled_tilemodes_npot,-1 -scaledemoji_rendering,-1 -scaledstrokes,-1 -shadermaskfilter_gradient,-1 -shadermaskfilter_image,-1 -shadermaskfilter_localmatrix,-1 -shadertext3,-1 -shadow_utils,-1 -shadow_utils_gray,-1 -shadow_utils_occl,-1 -shadows,-1 -sharedcorners,-1 -simple-magnification,-1 -simple-offsetimagefilter,-1 -simple-polygon-offset,-1 -simpleaaclip_path,-1 -simpleaaclip_rect,-1 -simpleblurroundrect,-1 -simplerect,-1 -simpleshapes,-1 -skbug_257,-1 -skbug_4868,-1 -skbug_8664,-1 -skinning,-1 -skinning_cached,-1 -skinning_cpu,-1 -skinning_cpu_cached,-1 -smallarc,-1 -smallpaths,-1 -squarehair,-1 -stlouisarch,-1 -stringart,-1 -stroke-fill,-1 -stroke_rect_shader,-1 -strokecircle,-1 -strokedlines,-1 -strokerect,-1 -strokerects,-1 -strokes3,-1 -strokes_poly,-1 -strokes_round,-1 -stroketext,-1 -sweep_tiling,-1 -tall_stretched_bitmaps,-1 -teenyStrokes,-1 -testgradient,-1 -text_scale_skew,-1 -textblob,-1 -textblobgeometrychange,-1 -textbloblooper,-1 -textblobmixedsizes,-1 -textblobmixedsizes_df,-1 -textblobrandomfont,-1 -textblobshader,-1 -textblobtransforms,-1 -textblobuseaftergpufree,-1 -texture_domain_effect,-1 -texture_domain_effect_bilerp,-1 -texture_domain_effect_mipmap,-1 -thinconcavepaths,-1 -thinrects,-1 -thinstrokedrects,-1 -tiledscaledbitmap,-1 -tileimagefilter,-1 -tilemode_decal,-1 -tilemode_gradient,-1 -tilemodes,-1 -tilemodes_npot,-1 -tinyanglearcs,-1 -trickycubicstrokes,-1 -trimpatheffect,-1 -typefacerendering,-1 -typefacerendering_pfa,-1 -typefacerendering_pfb,-1 -typefacestyles_kerning,-1 -varied_text_clipped_lcd,-1 -varied_text_clipped_no_lcd,-1 -varied_text_ignorable_clip_lcd,-1 -varied_text_ignorable_clip_no_lcd,-1 -vertices,-1 -vertices_batching,-1 -vertices_scaled_shader,-1 -verylarge_picture_image,-1 -verylargebitmap,-1 -windowrectangles,-1 -windowrectangles_mask,-1 -xfermodeimagefilter,-1 -xfermodes,-1 -yuv_nv12_to_rgb_effect,-1 -yuv_to_rgb_effect,-1 -zeroPath,-1 -zero_control_stroke,-1 -zero_length_paths_bw,-1 -zero_length_paths_dbl_aa,-1 -zero_length_paths_dbl_bw,-1 -zerolinestroke,-1 -windowrectangles_mask,-1 -s_BlurDrawImage,-1 -s_CubicStroke,-1 -s_OverStroke,-1 -s_aaclip,-1 -s_aarectmodes,-1 -s_aaxfermodes,-1 -s_addarc,-1 -s_addarc_meas,-1 -s_analytic_antialias_convex,-1 -s_analytic_antialias_general,-1 -s_analytic_gradients,-1 -s_animated-image-blurs,-1 -s_anisotropic_hq,-1 -s_arccirclegap,-1 -s_arcofzorro,-1 -s_arcto,-1 -s_b_119394958,-1 -s_badpaint,-1 -s_bezier_conic_effects,-1 -s_bezier_quad_effects,-1 -s_beziers,-1 -s_big_rrect_circle_aa_effect,-1 -s_big_rrect_circular_corner_aa_effect,-1 -s_big_rrect_ellipse_aa_effect,-1 -s_big_rrect_elliptical_corner_aa_effect,-1 -s_big_rrect_rect_aa_effect,-1 -s_bigblurs,-1 -s_bigconic,-1 -s_bigmatrix,-1 -s_bigrect,-1 -s_bigtext,-1 -s_bigtileimagefilter,-1 -s_bitmap_subset_shader,-1 -s_bitmapfilters,-1 -s_bitmaprect_i,-1 -s_bitmaprect_s,-1 -s_bitmaptiled_fractional_horizontal,-1 -s_bitmaptiled_fractional_vertical,-1 -s_bleed,-1 -s_bleed_alpha_bmp,-1 -s_bleed_alpha_bmp_shader,-1 -s_bleed_alpha_image,-1 -s_bleed_alpha_image_shader,-1 -s_bleed_image,-1 -s_blur2rects,-1 -s_blur_ignore_xform_circle,-1 -s_blur_ignore_xform_rect,-1 -s_blur_ignore_xform_rrect,-1 -s_blur_image,-1 -s_blurcircles,-1 -s_blurimagevmask,-1 -s_blurquickreject,-1 -s_blurrects,-1 -s_blurredclippedcircle,-1 -s_blurroundrect-WH-100x100-unevenCorners,-1 -s_blurs,-1 -s_bmp_filter_quality_repeat,-1 -s_bug5099,-1 -s_bug5252,-1 -s_bug530095,-1 -s_bug615686,-1 -s_bug6987,-1 -s_c_gms,-1 -s_chrome_gradtext2,-1 -s_circle_sizes,-1 -s_circles,-1 -s_circular-clips,-1 -s_circular_arcs_fill,-1 -s_circular_arcs_hairline,-1 -s_circular_arcs_stroke_and_fill_butt,-1 -s_circular_arcs_stroke_and_fill_round,-1 -s_circular_arcs_stroke_and_fill_square,-1 -s_circular_arcs_stroke_butt,-1 -s_circular_arcs_stroke_round,-1 -s_circular_arcs_stroke_square,-1 -s_circular_arcs_weird,-1 -s_clamped_gradients,-1 -s_clamped_gradients_nodither,-1 -s_clip_strokerect,-1 -s_clipcubic,-1 -s_clippedcubic2,-1 -s_clockwise,-1 -s_colorcomposefilter_wacky,-1 -s_coloremoji,-1 -s_combo-patheffects,-1 -s_complexclip2_path_aa,-1 -s_complexclip2_rect_aa,-1 -s_complexclip2_rrect_aa,-1 -s_complexclip2_rrect_bw,-1 -s_complexclip3_complex,-1 -s_complexclip3_simple,-1 -s_complexclip4_aa,-1 -s_complexclip_aa,-1 -s_complexclip_aa_invert,-1 -s_complexclip_aa_layer,-1 -s_complexclip_aa_layer_invert,-1 -s_complexclip_bw,-1 -s_complexclip_bw_invert,-1 -s_complexclip_bw_layer,-1 -s_complexclip_bw_layer_invert,-1 -s_composeshader_bitmap2,-1 -s_concavepaths,-1 -s_conicpaths,-1 -s_const_color_processor,-1 -s_contour_start,-1 -s_convex-lineonly-paths,-1 -s_convex-lineonly-paths-stroke-and-fill,-1 -s_convex-polygon-inset,-1 -s_convex_poly_clip,-1 -s_convex_poly_effect,-1 -s_convexpaths,-1 -s_crbug_640176,-1 -s_crbug_788500,-1 -s_crbug_847759,-1 -s_crbug_884166,-1 -s_crbug_887103,-1 -s_crbug_888453,-1 -s_crbug_892988,-1 -s_cross_context_image,-1 -s_cubicclosepath,-1 -s_cubicpath,-1 -s_daa,-1 -s_dash_line_zero_off_interval,-1 -s_dashcircle,-1 -s_dashcircle2,-1 -s_dashcubics,-1 -s_dashing,-1 -s_dashing2,-1 -s_dashing3,-1 -s_dashing4,-1 -s_dashing5_aa,-1 -s_dashing5_bw,-1 -s_degeneratesegments,-1 -s_dftext,-1 -s_dftext_blob_persp,-1 -s_displacement,-1 -s_dont_clip_to_layer,-1 -s_downsamplebitmap_checkerboard_high,-1 -s_downsamplebitmap_checkerboard_low,-1 -s_downsamplebitmap_checkerboard_medium,-1 -s_downsamplebitmap_checkerboard_none,-1 -s_downsamplebitmap_image_high,-1 -s_downsamplebitmap_image_low,-1 -s_downsamplebitmap_image_medium,-1 -s_downsamplebitmap_image_none,-1 -s_downsamplebitmap_text_high,-1 -s_downsamplebitmap_text_low,-1 -s_downsamplebitmap_text_medium,-1 -s_downsamplebitmap_text_none,-1 -s_draw-atlas,-1 -s_drawTextRSXform,-1 -s_draw_image_set,-1 -s_draw_image_set_rect_to_rect,-1 -s_draw_quad_set,-1 -s_drawable,-1 -s_drawbitmaprect-imagerect-subset,-1 -s_drawbitmaprect-subset,-1 -s_drawlooper,-1 -s_drawminibitmaprect,-1 -s_drawminibitmaprect_aa,-1 -s_drawregionmodes,-1 -s_dropshadowimagefilter,-1 -s_drrect,-1 -s_drrect_small_inner,-1 -s_dstreadshuffle,-1 -s_emboss,-1 -s_emptypath,-1 -s_extractbitmap,-1 -s_fancy_gradients,-1 -s_fancyblobunderline,-1 -s_fatpathfill,-1 -s_fillcircle,-1 -s_filltypes,-1 -s_filltypespersp,-1 -s_filterbitmap_checkerboard_192_192,-1 -s_filterbitmap_checkerboard_32_2,-1 -s_filterbitmap_checkerboard_32_32,-1 -s_filterbitmap_checkerboard_32_32_g8,-1 -s_filterbitmap_checkerboard_32_8,-1 -s_filterbitmap_checkerboard_4_4,-1 -s_filterbitmap_image_color_wheel.png,-1 -s_filterbitmap_image_mandrill_128.png,-1 -s_filterbitmap_image_mandrill_16.png,-1 -s_filterbitmap_image_mandrill_256.png,-1 -s_filterbitmap_image_mandrill_32.png,-1 -s_filterbitmap_image_mandrill_512.png,-1 -s_filterbitmap_image_mandrill_64.png,-1 -s_filterbitmap_image_mandrill_64.png_g8,-1 -s_filterbitmap_text_10.00pt,-1 -s_filterbitmap_text_3.00pt,-1 -s_filterbitmap_text_7.00pt,-1 -s_filterbug,-1 -s_filterfastbounds,-1 -s_filterindiabox,-1 -s_flippity,-1 -s_fontcache,-1 -s_fontcache-mt,-1 -s_fontmgr_bounds,-1 -s_fontmgr_bounds_0.75_0,-1 -s_fontmgr_bounds_1_-0.25,-1 -s_fontmgr_iter,-1 -s_fontmgr_match,-1 -s_fontregen,-1 -s_fontscaler,-1 -s_fontscalerdistortable,-1 -s_fwidth_squircle,-1 -s_gamma,-1 -s_getpostextpath,-1 -s_giantbitmap_clamp_bilerp_rotate,-1 -s_giantbitmap_clamp_bilerp_scale,-1 -s_giantbitmap_mirror_bilerp_rotate,-1 -s_giantbitmap_mirror_bilerp_scale,-1 -s_giantbitmap_repeat_bilerp_rotate,-1 -s_giantbitmap_repeat_bilerp_scale,-1 -s_glyph_pos_h_b,-1 -s_glyph_pos_h_f,-1 -s_glyph_pos_h_s,-1 -s_glyph_pos_n_b,-1 -s_glyph_pos_n_f,-1 -s_glyph_pos_n_s,-1 -s_gradient_dirty_laundry,-1 -s_gradients,-1 -s_gradients4f,-1 -s_gradients4f_nodither,-1 -s_gradients_2pt_conical_edge,-1 -s_gradients_2pt_conical_edge_mirror,-1 -s_gradients_2pt_conical_edge_nodither,-1 -s_gradients_2pt_conical_edge_repeat,-1 -s_gradients_2pt_conical_inside,-1 -s_gradients_2pt_conical_inside_mirror,-1 -s_gradients_2pt_conical_inside_nodither,-1 -s_gradients_2pt_conical_inside_repeat,-1 -s_gradients_2pt_conical_outside,-1 -s_gradients_2pt_conical_outside_mirror,-1 -s_gradients_2pt_conical_outside_nodither,-1 -s_gradients_2pt_conical_outside_repeat,-1 -s_gradients_degenerate_2pt,-1 -s_gradients_degenerate_2pt_nodither,-1 -s_gradients_dup_color_stops,-1 -s_gradients_local_perspective,-1 -s_gradients_local_perspective_nodither,-1 -s_gradients_no_texture,-1 -s_gradients_no_texture_nodither,-1 -s_gradients_nodither,-1 -s_gradients_view_perspective,-1 -s_gradients_view_perspective_nodither,-1 -s_hairlines,-1 -s_hairmodes,-1 -s_hittestpath,-1 -s_hsl,-1 -s_hugebitmapshader,-1 -s_image-cacherator-from-picture,-1 -s_image-cacherator-from-raster,-1 -s_image-cacherator-from-texture,-1 -s_image-picture,-1 -s_image-shader,-1 -s_image_from_yuv_textures,-1 -s_image_scale_aligned,-1 -s_imagealphathreshold_image,-1 -s_imageblur,-1 -s_imagefilters_xfermodes,-1 -s_imagefiltersbase,-1 -s_imagefiltersclipped,-1 -s_imagefilterscropexpand,-1 -s_imagefilterscropped,-1 -s_imagefiltersscaled,-1 -s_imagefiltersstroked,-1 -s_imagefilterstransformed,-1 -s_imagemagnifier,-1 -s_imagemagnifier_cropped,-1 -s_imagemakewithfilter,-1 -s_imageresizetiled,-1 -s_imagesource,-1 -s_imagesrc2_low,-1 -s_innershapes,-1 -s_innershapes_bw,-1 -s_internal_links,-1 -s_inverse_paths,-1 -s_largecircle,-1 -s_lattice,-1 -s_lcdoverlap,-1 -s_lcdtext,-1 -s_lighting,-1 -s_lightingshader2,-1 -s_lineclosepath,-1 -s_linepath,-1 -s_localmatrixshader_nested,-1 -s_longlinedash,-1 -s_longpathdash,-1 -s_longwavyline,-1 -s_lumafilter,-1 -s_maddash,-1 -s_mandoline,-1 -s_manyarcs,-1 -s_manycircles,-1 -s_manyrrects,-1 -s_matrixconvolution,-1 -s_matrixconvolution_color,-1 -s_matriximagefilter,-1 -s_mipmap,-1 -s_mixedtextblobs,-1 -s_morphology,-1 -s_nested_aa,-1 -s_nested_flipY_aa,-1 -s_nested_flipY_bw,-1 -s_new_texture_image,-1 -s_ninepatch-stretch,-1 -s_nonclosedpaths,-1 -s_ovals,-1 -s_p3_ovals,-1 -s_parsedpaths,-1 -s_patch_image,-1 -s_path-reverse,-1 -s_path_huge_crbug_800804,-1 -s_path_mask_cache,-1 -s_patheffect,-1 -s_pathfill,-1 -s_pathinterior,-1 -s_pathinvfill,-1 -s_pathopsinverse,-1 -s_pathopsskpclip,-1 -s_pdf_never_embed,-1 -s_persp_images,-1 -s_persp_shaders_aa,-1 -s_persp_shaders_bw,-1 -s_pictureimagefilter,-1 -s_pictureshader,-1 -s_pictureshader_localwrapper,-1 -s_pixel_snap_combo,-1 -s_pixel_snap_line,-1 -s_pixel_snap_point,-1 -s_pixel_snap_rect,-1 -s_points,-1 -s_poly2poly,-1 -s_polygons,-1 -s_quadcap,-1 -s_quadclosepath,-1 -s_quadpath,-1 -s_radial_gradient4,-1 -s_radial_gradient4_nodither,-1 -s_rectangle_texture,-1 -s_rects,-1 -s_rects_as_paths,-1 -s_repeated_bitmap_jpg,-1 -s_resizeimagefilter,-1 -s_rotate_imagefilter,-1 -s_rotatedcubicpath,-1 -s_roundrects,-1 -s_rrect,-1 -s_rrect_clip_aa,-1 -s_rrect_clip_draw_paint,-1 -s_rrect_draw_aa,-1 -s_rrect_effect,-1 -s_save_behind,-1 -s_savelayer_clipmask,-1 -s_savelayer_coverage,-1 -s_savelayer_initfromprev,-1 -s_savelayer_maskfilter,-1 -s_savelayer_with_backdrop,-1 -s_scaled_tilemodes_npot,-1 -s_scaledemoji_rendering,-1 -s_scaledstrokes,-1 -s_shadermaskfilter_gradient,-1 -s_shadermaskfilter_image,-1 -s_shadermaskfilter_localmatrix,-1 -s_shadertext3,-1 -s_shadow_utils,-1 -s_shadow_utils_gray,-1 -s_shadow_utils_occl,-1 -s_shadows,-1 -s_sharedcorners,-1 -s_simple-magnification,-1 -s_simple-offsetimagefilter,-1 -s_simple-polygon-offset,-1 -s_simpleaaclip_path,-1 -s_simpleaaclip_rect,-1 -s_simpleblurroundrect,-1 -s_simplerect,-1 -s_simpleshapes,-1 -s_skbug_257,-1 -s_skbug_4868,-1 -s_skbug_8664,-1 -s_skinning,-1 -s_skinning_cached,-1 -s_skinning_cpu,-1 -s_skinning_cpu_cached,-1 -s_smallarc,-1 -s_smallpaths,-1 -s_squarehair,-1 -s_stlouisarch,-1 -s_stringart,-1 -s_stroke-fill,-1 -s_stroke_rect_shader,-1 -s_strokecircle,-1 -s_strokedlines,-1 -s_strokerect,-1 -s_strokerects,-1 -s_strokes3,-1 -s_strokes_poly,-1 -s_strokes_round,-1 -s_stroketext,-1 -s_sweep_tiling,-1 -s_tall_stretched_bitmaps,-1 -s_teenyStrokes,-1 -s_testgradient,-1 -s_text_scale_skew,-1 -s_textblob,-1 -s_textblobgeometrychange,-1 -s_textbloblooper,-1 -s_textblobmixedsizes,-1 -s_textblobmixedsizes_df,-1 -s_textblobrandomfont,-1 -s_textblobshader,-1 -s_textblobtransforms,-1 -s_textblobuseaftergpufree,-1 -s_texture_domain_effect,-1 -s_texture_domain_effect_bilerp,-1 -s_texture_domain_effect_mipmap,-1 -s_thinconcavepaths,-1 -s_thinrects,-1 -s_thinstrokedrects,-1 -s_tiledscaledbitmap,-1 -s_tileimagefilter,-1 -s_tilemode_decal,-1 -s_tilemode_gradient,-1 -s_tilemodes,-1 -s_tilemodes_npot,-1 -s_tinyanglearcs,-1 -s_trickycubicstrokes,-1 -s_trimpatheffect,-1 -s_typefacerendering,-1 -s_typefacerendering_pfa,-1 -s_typefacerendering_pfb,-1 -s_typefacestyles_kerning,-1 -s_varied_text_clipped_lcd,-1 -s_varied_text_clipped_no_lcd,-1 -s_varied_text_ignorable_clip_lcd,-1 -s_varied_text_ignorable_clip_no_lcd,-1 -s_vertices,-1 -s_vertices_batching,-1 -s_vertices_scaled_shader,-1 -s_verylarge_picture_image,-1 -s_verylargebitmap,-1 -s_windowrectangles,-1 -s_windowrectangles_mask,-1 -s_xfermodeimagefilter,-1 -s_xfermodes,-1 -s_yuv_nv12_to_rgb_effect,-1 -s_yuv_to_rgb_effect,-1 -s_zeroPath,-1 -s_zero_control_stroke,-1 -s_zero_length_paths_bw,-1 -s_zero_length_paths_dbl_aa,-1 -s_zero_length_paths_dbl_bw,-1 -s_zerolinestroke,-1 -windowrectangles_mask,-1 -BlurDrawImage,-1 -CubicStroke,-1 -OverStroke,-1 -aaclip,-1 -aarectmodes,-1 -aaxfermodes,-1 -addarc,-1 -addarc_meas,-1 -analytic_antialias_convex,-1 -analytic_antialias_general,-1 -analytic_gradients,-1 -animated-image-blurs,-1 -anisotropic_hq,-1 -arccirclegap,-1 -arcofzorro,-1 -arcto,-1 -b_119394958,-1 -badpaint,-1 -bezier_conic_effects,-1 -bezier_quad_effects,-1 -beziers,-1 -big_rrect_circle_aa_effect,-1 -big_rrect_circular_corner_aa_effect,-1 -big_rrect_ellipse_aa_effect,-1 -big_rrect_elliptical_corner_aa_effect,-1 -big_rrect_rect_aa_effect,-1 -bigblurs,-1 -bigconic,-1 -bigmatrix,-1 -bigrect,-1 -bigtext,-1 -bigtileimagefilter,-1 -bitmap_subset_shader,-1 -bitmapfilters,-1 -bitmaprect_i,-1 -bitmaprect_s,-1 -bitmaptiled_fractional_horizontal,-1 -bitmaptiled_fractional_vertical,-1 -bleed,-1 -bleed_alpha_bmp,-1 -bleed_alpha_bmp_shader,-1 -bleed_alpha_image,-1 -bleed_alpha_image_shader,-1 -bleed_image,-1 -blur2rects,-1 -blur_ignore_xform_circle,-1 -blur_ignore_xform_rect,-1 -blur_ignore_xform_rrect,-1 -blur_image,-1 -blurcircles,-1 -blurimagevmask,-1 -blurquickreject,-1 -blurrects,-1 -blurredclippedcircle,-1 -blurroundrect-WH-100x100-unevenCorners,-1 -blurs,-1 -bmp_filter_quality_repeat,-1 -bug5099,-1 -bug5252,-1 -bug530095,-1 -bug615686,-1 -bug6987,-1 -c_gms,-1 -chrome_gradtext2,-1 -circle_sizes,-1 -circles,-1 -circular-clips,-1 -circular_arcs_fill,-1 -circular_arcs_hairline,-1 -circular_arcs_stroke_and_fill_butt,-1 -circular_arcs_stroke_and_fill_round,-1 -circular_arcs_stroke_and_fill_square,-1 -circular_arcs_stroke_butt,-1 -circular_arcs_stroke_round,-1 -circular_arcs_stroke_square,-1 -circular_arcs_weird,-1 -clamped_gradients,-1 -clamped_gradients_nodither,-1 -clip_strokerect,-1 -clipcubic,-1 -clippedcubic2,-1 -clockwise,-1 -colorcomposefilter_wacky,-1 -coloremoji,-1 -combo-patheffects,-1 -complexclip2_path_aa,-1 -complexclip2_rect_aa,-1 -complexclip2_rrect_aa,-1 -complexclip2_rrect_bw,-1 -complexclip3_complex,-1 -complexclip3_simple,-1 -complexclip4_aa,-1 -complexclip_aa,-1 -complexclip_aa_invert,-1 -complexclip_aa_layer,-1 -complexclip_aa_layer_invert,-1 -complexclip_bw,-1 -complexclip_bw_invert,-1 -complexclip_bw_layer,-1 -complexclip_bw_layer_invert,-1 -composeshader_bitmap2,-1 -concavepaths,-1 -conicpaths,-1 -const_color_processor,-1 -contour_start,-1 -convex-lineonly-paths,-1 -convex-lineonly-paths-stroke-and-fill,-1 -convex-polygon-inset,-1 -convex_poly_clip,-1 -convex_poly_effect,-1 -convexpaths,-1 -crbug_640176,-1 -crbug_788500,-1 -crbug_847759,-1 -crbug_884166,-1 -crbug_887103,-1 -crbug_888453,-1 -crbug_892988,-1 -cross_context_image,-1 -cubicclosepath,-1 -cubicpath,-1 -daa,-1 -dash_line_zero_off_interval,-1 -dashcircle,-1 -dashcircle2,-1 -dashcubics,-1 -dashing,-1 -dashing2,-1 -dashing3,-1 -dashing4,-1 -dashing5_aa,-1 -dashing5_bw,-1 -degeneratesegments,-1 -dftext,-1 -dftext_blob_persp,-1 -displacement,-1 -dont_clip_to_layer,-1 -downsamplebitmap_checkerboard_high,-1 -downsamplebitmap_checkerboard_low,-1 -downsamplebitmap_checkerboard_medium,-1 -downsamplebitmap_checkerboard_none,-1 -downsamplebitmap_image_high,-1 -downsamplebitmap_image_low,-1 -downsamplebitmap_image_medium,-1 -downsamplebitmap_image_none,-1 -downsamplebitmap_text_high,-1 -downsamplebitmap_text_low,-1 -downsamplebitmap_text_medium,-1 -downsamplebitmap_text_none,-1 -draw-atlas,-1 -drawTextRSXform,-1 -draw_image_set,-1 -draw_image_set_rect_to_rect,-1 -draw_quad_set,-1 -drawable,-1 -drawbitmaprect-imagerect-subset,-1 -drawbitmaprect-subset,-1 -drawlooper,-1 -drawminibitmaprect,-1 -drawminibitmaprect_aa,-1 -drawregionmodes,-1 -dropshadowimagefilter,-1 -drrect,-1 -drrect_small_inner,-1 -dstreadshuffle,-1 -emboss,-1 -emptypath,-1 -extractbitmap,-1 -fancy_gradients,-1 -fancyblobunderline,-1 -fatpathfill,-1 -fillcircle,-1 -filltypes,-1 -filltypespersp,-1 -filterbitmap_checkerboard_192_192,-1 -filterbitmap_checkerboard_32_2,-1 -filterbitmap_checkerboard_32_32,-1 -filterbitmap_checkerboard_32_32_g8,-1 -filterbitmap_checkerboard_32_8,-1 -filterbitmap_checkerboard_4_4,-1 -filterbitmap_image_color_wheel.png,-1 -filterbitmap_image_mandrill_128.png,-1 -filterbitmap_image_mandrill_16.png,-1 -filterbitmap_image_mandrill_256.png,-1 -filterbitmap_image_mandrill_32.png,-1 -filterbitmap_image_mandrill_512.png,-1 -filterbitmap_image_mandrill_64.png,-1 -filterbitmap_image_mandrill_64.png_g8,-1 -filterbitmap_text_10.00pt,-1 -filterbitmap_text_3.00pt,-1 -filterbitmap_text_7.00pt,-1 -filterbug,-1 -filterfastbounds,-1 -filterindiabox,-1 -flippity,-1 -fontcache,-1 -fontcache-mt,-1 -fontmgr_bounds,-1 -fontmgr_bounds_0.75_0,-1 -fontmgr_bounds_1_-0.25,-1 -fontmgr_iter,-1 -fontmgr_match,-1 -fontregen,-1 -fontscaler,-1 -fontscalerdistortable,-1 -fwidth_squircle,-1 -gamma,-1 -getpostextpath,-1 -giantbitmap_clamp_bilerp_rotate,-1 -giantbitmap_clamp_bilerp_scale,-1 -giantbitmap_mirror_bilerp_rotate,-1 -giantbitmap_mirror_bilerp_scale,-1 -giantbitmap_repeat_bilerp_rotate,-1 -giantbitmap_repeat_bilerp_scale,-1 -glyph_pos_h_b,-1 -glyph_pos_h_f,-1 -glyph_pos_h_s,-1 -glyph_pos_n_b,-1 -glyph_pos_n_f,-1 -glyph_pos_n_s,-1 -gradient_dirty_laundry,-1 -gradients,-1 -gradients4f,-1 -gradients4f_nodither,-1 -gradients_2pt_conical_edge,-1 -gradients_2pt_conical_edge_mirror,-1 -gradients_2pt_conical_edge_nodither,-1 -gradients_2pt_conical_edge_repeat,-1 -gradients_2pt_conical_inside,-1 -gradients_2pt_conical_inside_mirror,-1 -gradients_2pt_conical_inside_nodither,-1 -gradients_2pt_conical_inside_repeat,-1 -gradients_2pt_conical_outside,-1 -gradients_2pt_conical_outside_mirror,-1 -gradients_2pt_conical_outside_nodither,-1 -gradients_2pt_conical_outside_repeat,-1 -gradients_degenerate_2pt,-1 -gradients_degenerate_2pt_nodither,-1 -gradients_dup_color_stops,-1 -gradients_local_perspective,-1 -gradients_local_perspective_nodither,-1 -gradients_no_texture,-1 -gradients_no_texture_nodither,-1 -gradients_nodither,-1 -gradients_view_perspective,-1 -gradients_view_perspective_nodither,-1 -hairlines,-1 -hairmodes,-1 -hittestpath,-1 -hsl,-1 -hugebitmapshader,-1 -image-cacherator-from-picture,-1 -image-cacherator-from-raster,-1 -image-cacherator-from-texture,-1 -image-picture,-1 -image-shader,-1 -image_from_yuv_textures,-1 -image_scale_aligned,-1 -imagealphathreshold_image,-1 -imageblur,-1 -# A few pixels at the edge of the blur, nothing more noticeable at those points than the jaggedness elsewhere. -imageblurclampmode,180 -imagefilters_xfermodes,-1 -imagefiltersbase,-1 -imagefiltersclipped,-1 -imagefilterscropexpand,-1 -imagefilterscropped,-1 -imagefiltersscaled,-1 -imagefiltersstroked,-1 -imagefilterstransformed,-1 -imagemagnifier,-1 -imagemagnifier_cropped,-1 -imagemakewithfilter,-1 -imageresizetiled,-1 -imagesource,-1 -imagesrc2_low,-1 -innershapes,-1 -innershapes_bw,-1 -internal_links,-1 -inverse_paths,-1 -largecircle,-1 -lattice,-1 -lcdoverlap,-1 -lcdtext,-1 -lighting,-1 -lightingshader2,-1 -lineclosepath,-1 -linepath,-1 -localmatrixshader_nested,-1 -longlinedash,-1 -longpathdash,-1 -longwavyline,-1 -lumafilter,-1 -maddash,-1 -mandoline,-1 -manyarcs,-1 -manycircles,-1 -manyrrects,-1 -matrixconvolution,-1 -matrixconvolution_color,-1 -matriximagefilter,-1 -mipmap,-1 -mixedtextblobs,-1 -morphology,-1 -nested_aa,-1 -nested_flipY_aa,-1 -nested_flipY_bw,-1 -new_texture_image,-1 -ninepatch-stretch,-1 -nonclosedpaths,-1 -ovals,-1 -p3_ovals,-1 -parsedpaths,-1 -patch_image,-1 -path-reverse,-1 -path_huge_crbug_800804,-1 -path_mask_cache,-1 -patheffect,-1 -pathfill,-1 -pathinterior,-1 -pathinvfill,-1 -pathopsinverse,-1 -pathopsskpclip,-1 -pdf_never_embed,-1 -persp_images,-1 -persp_shaders_aa,-1 -persp_shaders_bw,-1 -pictureimagefilter,-1 -pictureshader,-1 -pictureshader_localwrapper,-1 -pixel_snap_combo,-1 -pixel_snap_line,-1 -pixel_snap_point,-1 -pixel_snap_rect,-1 -points,-1 -poly2poly,-1 -polygons,-1 -quadcap,-1 -quadclosepath,-1 -quadpath,-1 -radial_gradient4,-1 -radial_gradient4_nodither,-1 -rectangle_texture,-1 -rects,-1 -rects_as_paths,-1 -repeated_bitmap_jpg,-1 -resizeimagefilter,-1 -rotate_imagefilter,-1 -rotatedcubicpath,-1 -roundrects,-1 -rrect,-1 -rrect_clip_aa,-1 -rrect_clip_draw_paint,-1 -rrect_draw_aa,-1 -rrect_effect,-1 -save_behind,-1 -savelayer_clipmask,-1 -savelayer_coverage,-1 -savelayer_initfromprev,-1 -savelayer_maskfilter,-1 -savelayer_with_backdrop,-1 -scaled_tilemodes_npot,-1 -scaledemoji_rendering,-1 -scaledstrokes,-1 -shadermaskfilter_gradient,-1 -shadermaskfilter_image,-1 -shadermaskfilter_localmatrix,-1 -shadertext3,-1 -shadow_utils,-1 -shadow_utils_gray,-1 -shadow_utils_occl,-1 -shadows,-1 -sharedcorners,-1 -simple-magnification,-1 -simple-offsetimagefilter,-1 -simple-polygon-offset,-1 -simpleaaclip_path,-1 -simpleaaclip_rect,-1 -simpleblurroundrect,-1 -simplerect,-1 -simpleshapes,-1 -skbug_257,-1 -skbug_4868,-1 -skbug_8664,-1 -skinning,-1 -skinning_cached,-1 -skinning_cpu,-1 -skinning_cpu_cached,-1 -smallarc,-1 -smallpaths,-1 -squarehair,-1 -stlouisarch,-1 -stringart,-1 -stroke-fill,-1 -stroke_rect_shader,-1 -strokecircle,-1 -strokedlines,-1 -strokerect,-1 -strokerects,-1 -strokes3,-1 -strokes_poly,-1 -strokes_round,-1 -stroketext,-1 -sweep_tiling,-1 -tall_stretched_bitmaps,-1 -teenyStrokes,-1 -testgradient,-1 -text_scale_skew,-1 -textblob,-1 -textblobgeometrychange,-1 -textbloblooper,-1 -textblobmixedsizes,-1 -textblobmixedsizes_df,-1 -textblobrandomfont,-1 -textblobshader,-1 -textblobtransforms,-1 -textblobuseaftergpufree,-1 -texture_domain_effect,-1 -texture_domain_effect_bilerp,-1 -texture_domain_effect_mipmap,-1 -thinconcavepaths,-1 -thinrects,-1 -thinstrokedrects,-1 -tiledscaledbitmap,-1 -tileimagefilter,-1 -tilemode_decal,-1 -tilemode_gradient,-1 -tilemodes,-1 -tilemodes_npot,-1 -tinyanglearcs,-1 -trickycubicstrokes,-1 -trimpatheffect,-1 -typefacerendering,-1 -typefacerendering_pfa,-1 -typefacerendering_pfb,-1 -typefacestyles_kerning,-1 -varied_text_clipped_lcd,-1 -varied_text_clipped_no_lcd,-1 -varied_text_ignorable_clip_lcd,-1 -varied_text_ignorable_clip_no_lcd,-1 -vertices,-1 -vertices_batching,-1 -vertices_scaled_shader,-1 -verylarge_picture_image,-1 -verylargebitmap,-1 -windowrectangles,-1 -windowrectangles_mask,-1 -xfermodeimagefilter,-1 -xfermodes,-1 -yuv_nv12_to_rgb_effect,-1 -yuv_to_rgb_effect,-1 -zeroPath,-1 -zero_control_stroke,-1 -zero_length_paths_bw,-1 -zero_length_paths_dbl_aa,-1 -zero_length_paths_dbl_bw,-1 -zerolinestroke,-1 diff --git a/lib/mesa/src/freedreno/ci/freedreno-a630-skqp-gles_rendertests-flakes.txt b/lib/mesa/src/freedreno/ci/freedreno-a630-skqp-gles_rendertests-flakes.txt deleted file mode 100644 index 6362cb0c7..000000000 --- a/lib/mesa/src/freedreno/ci/freedreno-a630-skqp-gles_rendertests-flakes.txt +++ /dev/null @@ -1,8 +0,0 @@ -# The following test group pass on the first 3 runs, but may flake eventually. -# This is an assumption, since those ones were set as failed by Android CTS -blur2rectsnonninepatch -bug339297_as_clip -bug6083 -cliperror -dashtextcaps -largeglyphblur diff --git a/lib/mesa/src/freedreno/ci/freedreno-a630-skqp-vk_rendertests-fails.txt b/lib/mesa/src/freedreno/ci/freedreno-a630-skqp-vk_rendertests-fails.txt deleted file mode 100644 index 2f52c09c6..000000000 --- a/lib/mesa/src/freedreno/ci/freedreno-a630-skqp-vk_rendertests-fails.txt +++ /dev/null @@ -1,1379 +0,0 @@ -# Acceptable error - -# Two pixels with wrong color -ovals_as_paths,24 - -# Bad rendering - -arcs_as_paths,-1 -etc1,-1 - -# Model missing - -BlurDrawImage,-1 -CubicStroke,-1 -OverStroke,-1 -aaclip,-1 -aarectmodes,-1 -aaxfermodes,-1 -addarc,-1 -addarc_meas,-1 -analytic_antialias_convex,-1 -analytic_antialias_general,-1 -analytic_gradients,-1 -animated-image-blurs,-1 -anisotropic_hq,-1 -arccirclegap,-1 -arcofzorro,-1 -arcto,-1 -b_119394958,-1 -badpaint,-1 -bezier_conic_effects,-1 -bezier_quad_effects,-1 -beziers,-1 -big_rrect_circle_aa_effect,-1 -big_rrect_circular_corner_aa_effect,-1 -big_rrect_ellipse_aa_effect,-1 -big_rrect_elliptical_corner_aa_effect,-1 -big_rrect_rect_aa_effect,-1 -bigblurs,-1 -bigconic,-1 -bigmatrix,-1 -bigrect,-1 -bigtext,-1 -bigtileimagefilter,-1 -bitmap_subset_shader,-1 -bitmapfilters,-1 -bitmaprect_i,-1 -bitmaprect_s,-1 -bitmaptiled_fractional_horizontal,-1 -bitmaptiled_fractional_vertical,-1 -bleed,-1 -bleed_alpha_bmp,-1 -bleed_alpha_bmp_shader,-1 -bleed_alpha_image,-1 -bleed_alpha_image_shader,-1 -bleed_image,-1 -blur2rects,-1 -blur_ignore_xform_circle,-1 -blur_ignore_xform_rect,-1 -blur_ignore_xform_rrect,-1 -blur_image,-1 -blurcircles,-1 -blurimagevmask,-1 -blurquickreject,-1 -blurrects,-1 -blurredclippedcircle,-1 -blurroundrect-WH-100x100-unevenCorners,-1 -blurs,-1 -bmp_filter_quality_repeat,-1 -bug5099,-1 -bug5252,-1 -bug530095,-1 -bug615686,-1 -bug6987,-1 -c_gms,-1 -chrome_gradtext2,-1 -circle_sizes,-1 -circles,-1 -circular-clips,-1 -circular_arcs_fill,-1 -circular_arcs_hairline,-1 -circular_arcs_stroke_and_fill_butt,-1 -circular_arcs_stroke_and_fill_round,-1 -circular_arcs_stroke_and_fill_square,-1 -circular_arcs_stroke_butt,-1 -circular_arcs_stroke_round,-1 -circular_arcs_stroke_square,-1 -circular_arcs_weird,-1 -clamped_gradients,-1 -clamped_gradients_nodither,-1 -clip_strokerect,-1 -clipcubic,-1 -clippedcubic2,-1 -clockwise,-1 -colorcomposefilter_wacky,-1 -coloremoji,-1 -combo-patheffects,-1 -complexclip2_path_aa,-1 -complexclip2_rect_aa,-1 -complexclip2_rrect_aa,-1 -complexclip2_rrect_bw,-1 -complexclip3_complex,-1 -complexclip3_simple,-1 -complexclip4_aa,-1 -complexclip_aa,-1 -complexclip_aa_invert,-1 -complexclip_aa_layer,-1 -complexclip_aa_layer_invert,-1 -complexclip_bw,-1 -complexclip_bw_invert,-1 -complexclip_bw_layer,-1 -complexclip_bw_layer_invert,-1 -composeshader_bitmap2,-1 -concavepaths,-1 -conicpaths,-1 -const_color_processor,-1 -contour_start,-1 -convex-lineonly-paths,-1 -convex-lineonly-paths-stroke-and-fill,-1 -convex-polygon-inset,-1 -convex_poly_clip,-1 -convex_poly_effect,-1 -convexpaths,-1 -crbug_640176,-1 -crbug_788500,-1 -crbug_847759,-1 -crbug_884166,-1 -crbug_887103,-1 -crbug_888453,-1 -crbug_892988,-1 -cross_context_image,-1 -cubicclosepath,-1 -cubicpath,-1 -daa,-1 -dash_line_zero_off_interval,-1 -dashcircle,-1 -dashcircle2,-1 -dashcubics,-1 -dashing,-1 -dashing2,-1 -dashing3,-1 -dashing4,-1 -dashing5_aa,-1 -dashing5_bw,-1 -degeneratesegments,-1 -dftext,-1 -dftext_blob_persp,-1 -displacement,-1 -dont_clip_to_layer,-1 -downsamplebitmap_checkerboard_high,-1 -downsamplebitmap_checkerboard_low,-1 -downsamplebitmap_checkerboard_medium,-1 -downsamplebitmap_checkerboard_none,-1 -downsamplebitmap_image_high,-1 -downsamplebitmap_image_low,-1 -downsamplebitmap_image_medium,-1 -downsamplebitmap_image_none,-1 -downsamplebitmap_text_high,-1 -downsamplebitmap_text_low,-1 -downsamplebitmap_text_medium,-1 -downsamplebitmap_text_none,-1 -draw-atlas,-1 -drawTextRSXform,-1 -draw_image_set,-1 -draw_image_set_rect_to_rect,-1 -draw_quad_set,-1 -drawable,-1 -drawbitmaprect-imagerect-subset,-1 -drawbitmaprect-subset,-1 -drawlooper,-1 -drawminibitmaprect,-1 -drawminibitmaprect_aa,-1 -drawregionmodes,-1 -dropshadowimagefilter,-1 -drrect,-1 -drrect_small_inner,-1 -dstreadshuffle,-1 -emboss,-1 -emptypath,-1 -extractbitmap,-1 -fancy_gradients,-1 -fancyblobunderline,-1 -fatpathfill,-1 -fillcircle,-1 -filltypes,-1 -filltypespersp,-1 -filterbitmap_checkerboard_192_192,-1 -filterbitmap_checkerboard_32_2,-1 -filterbitmap_checkerboard_32_32,-1 -filterbitmap_checkerboard_32_32_g8,-1 -filterbitmap_checkerboard_32_8,-1 -filterbitmap_checkerboard_4_4,-1 -filterbitmap_image_color_wheel.png,-1 -filterbitmap_image_mandrill_128.png,-1 -filterbitmap_image_mandrill_16.png,-1 -filterbitmap_image_mandrill_256.png,-1 -filterbitmap_image_mandrill_32.png,-1 -filterbitmap_image_mandrill_512.png,-1 -filterbitmap_image_mandrill_64.png,-1 -filterbitmap_image_mandrill_64.png_g8,-1 -filterbitmap_text_10.00pt,-1 -filterbitmap_text_3.00pt,-1 -filterbitmap_text_7.00pt,-1 -filterbug,-1 -filterfastbounds,-1 -filterindiabox,-1 -flippity,-1 -fontcache,-1 -fontcache-mt,-1 -fontmgr_bounds,-1 -fontmgr_bounds_0.75_0,-1 -fontmgr_bounds_1_-0.25,-1 -fontmgr_iter,-1 -fontmgr_match,-1 -fontregen,-1 -fontscaler,-1 -fontscalerdistortable,-1 -fwidth_squircle,-1 -gamma,-1 -getpostextpath,-1 -giantbitmap_clamp_bilerp_rotate,-1 -giantbitmap_clamp_bilerp_scale,-1 -giantbitmap_mirror_bilerp_rotate,-1 -giantbitmap_mirror_bilerp_scale,-1 -giantbitmap_repeat_bilerp_rotate,-1 -giantbitmap_repeat_bilerp_scale,-1 -glyph_pos_h_b,-1 -glyph_pos_h_f,-1 -glyph_pos_h_s,-1 -glyph_pos_n_b,-1 -glyph_pos_n_f,-1 -glyph_pos_n_s,-1 -gradient_dirty_laundry,-1 -gradients,-1 -gradients4f,-1 -gradients4f_nodither,-1 -gradients_2pt_conical_edge,-1 -gradients_2pt_conical_edge_mirror,-1 -gradients_2pt_conical_edge_nodither,-1 -gradients_2pt_conical_edge_repeat,-1 -gradients_2pt_conical_inside,-1 -gradients_2pt_conical_inside_mirror,-1 -gradients_2pt_conical_inside_nodither,-1 -gradients_2pt_conical_inside_repeat,-1 -gradients_2pt_conical_outside,-1 -gradients_2pt_conical_outside_mirror,-1 -gradients_2pt_conical_outside_nodither,-1 -gradients_2pt_conical_outside_repeat,-1 -gradients_degenerate_2pt,-1 -gradients_degenerate_2pt_nodither,-1 -gradients_dup_color_stops,-1 -gradients_local_perspective,-1 -gradients_local_perspective_nodither,-1 -gradients_no_texture,-1 -gradients_no_texture_nodither,-1 -gradients_nodither,-1 -gradients_view_perspective,-1 -gradients_view_perspective_nodither,-1 -hairlines,-1 -hairmodes,-1 -hittestpath,-1 -hsl,-1 -hugebitmapshader,-1 -image-cacherator-from-picture,-1 -image-cacherator-from-raster,-1 -image-cacherator-from-texture,-1 -image-picture,-1 -image-shader,-1 -image_from_yuv_textures,-1 -image_scale_aligned,-1 -imagealphathreshold_image,-1 -imageblur,-1 -imagefilters_xfermodes,-1 -imagefiltersbase,-1 -imagefiltersclipped,-1 -imagefilterscropexpand,-1 -imagefilterscropped,-1 -imagefiltersscaled,-1 -imagefiltersstroked,-1 -imagefilterstransformed,-1 -imagemagnifier,-1 -imagemagnifier_cropped,-1 -imagemakewithfilter,-1 -imageresizetiled,-1 -imagesource,-1 -imagesrc2_low,-1 -innershapes,-1 -innershapes_bw,-1 -internal_links,-1 -inverse_paths,-1 -largecircle,-1 -lattice,-1 -lcdoverlap,-1 -lcdtext,-1 -lighting,-1 -lightingshader2,-1 -lineclosepath,-1 -linepath,-1 -localmatrixshader_nested,-1 -longlinedash,-1 -longpathdash,-1 -longwavyline,-1 -lumafilter,-1 -maddash,-1 -mandoline,-1 -manyarcs,-1 -manycircles,-1 -manyrrects,-1 -matrixconvolution,-1 -matrixconvolution_color,-1 -matriximagefilter,-1 -mipmap,-1 -mixedtextblobs,-1 -morphology,-1 -nested_aa,-1 -nested_flipY_aa,-1 -nested_flipY_bw,-1 -new_texture_image,-1 -ninepatch-stretch,-1 -nonclosedpaths,-1 -ovals,-1 -p3_ovals,-1 -parsedpaths,-1 -patch_image,-1 -path-reverse,-1 -path_huge_crbug_800804,-1 -path_mask_cache,-1 -patheffect,-1 -pathfill,-1 -pathinterior,-1 -pathinvfill,-1 -pathopsinverse,-1 -pathopsskpclip,-1 -pdf_never_embed,-1 -persp_images,-1 -persp_shaders_aa,-1 -persp_shaders_bw,-1 -pictureimagefilter,-1 -pictureshader,-1 -pictureshader_localwrapper,-1 -pixel_snap_combo,-1 -pixel_snap_line,-1 -pixel_snap_point,-1 -pixel_snap_rect,-1 -points,-1 -poly2poly,-1 -polygons,-1 -quadcap,-1 -quadclosepath,-1 -quadpath,-1 -radial_gradient4,-1 -radial_gradient4_nodither,-1 -rectangle_texture,-1 -rects,-1 -rects_as_paths,-1 -repeated_bitmap_jpg,-1 -resizeimagefilter,-1 -rotate_imagefilter,-1 -rotatedcubicpath,-1 -roundrects,-1 -rrect,-1 -rrect_clip_aa,-1 -rrect_clip_draw_paint,-1 -rrect_draw_aa,-1 -rrect_effect,-1 -save_behind,-1 -savelayer_clipmask,-1 -savelayer_coverage,-1 -savelayer_initfromprev,-1 -savelayer_maskfilter,-1 -savelayer_with_backdrop,-1 -scaled_tilemodes_npot,-1 -scaledemoji_rendering,-1 -scaledstrokes,-1 -shadermaskfilter_gradient,-1 -shadermaskfilter_image,-1 -shadermaskfilter_localmatrix,-1 -shadertext3,-1 -shadow_utils,-1 -shadow_utils_gray,-1 -shadow_utils_occl,-1 -shadows,-1 -sharedcorners,-1 -simple-magnification,-1 -simple-offsetimagefilter,-1 -simple-polygon-offset,-1 -simpleaaclip_path,-1 -simpleaaclip_rect,-1 -simpleblurroundrect,-1 -simplerect,-1 -simpleshapes,-1 -skbug_257,-1 -skbug_4868,-1 -skbug_8664,-1 -skinning,-1 -skinning_cached,-1 -skinning_cpu,-1 -skinning_cpu_cached,-1 -smallarc,-1 -smallpaths,-1 -squarehair,-1 -stlouisarch,-1 -stringart,-1 -stroke-fill,-1 -stroke_rect_shader,-1 -strokecircle,-1 -strokedlines,-1 -strokerect,-1 -strokerects,-1 -strokes3,-1 -strokes_poly,-1 -strokes_round,-1 -stroketext,-1 -sweep_tiling,-1 -tall_stretched_bitmaps,-1 -teenyStrokes,-1 -testgradient,-1 -text_scale_skew,-1 -textblob,-1 -textblobgeometrychange,-1 -textbloblooper,-1 -textblobmixedsizes,-1 -textblobmixedsizes_df,-1 -textblobrandomfont,-1 -textblobshader,-1 -textblobtransforms,-1 -textblobuseaftergpufree,-1 -texture_domain_effect,-1 -texture_domain_effect_bilerp,-1 -texture_domain_effect_mipmap,-1 -thinconcavepaths,-1 -thinrects,-1 -thinstrokedrects,-1 -tiledscaledbitmap,-1 -tileimagefilter,-1 -tilemode_decal,-1 -tilemode_gradient,-1 -tilemodes,-1 -tilemodes_npot,-1 -tinyanglearcs,-1 -trickycubicstrokes,-1 -trimpatheffect,-1 -typefacerendering,-1 -typefacerendering_pfa,-1 -typefacerendering_pfb,-1 -typefacestyles_kerning,-1 -varied_text_clipped_lcd,-1 -varied_text_clipped_no_lcd,-1 -varied_text_ignorable_clip_lcd,-1 -varied_text_ignorable_clip_no_lcd,-1 -vertices,-1 -vertices_batching,-1 -vertices_scaled_shader,-1 -verylarge_picture_image,-1 -verylargebitmap,-1 -windowrectangles,-1 -windowrectangles_mask,-1 -xfermodeimagefilter,-1 -xfermodes,-1 -yuv_nv12_to_rgb_effect,-1 -yuv_to_rgb_effect,-1 -zeroPath,-1 -zero_control_stroke,-1 -zero_length_paths_bw,-1 -zero_length_paths_dbl_aa,-1 -zero_length_paths_dbl_bw,-1 -zerolinestroke,-1 -windowrectangles_mask,-1 -s_BlurDrawImage,-1 -s_CubicStroke,-1 -s_OverStroke,-1 -s_aaclip,-1 -s_aarectmodes,-1 -s_aaxfermodes,-1 -s_addarc,-1 -s_addarc_meas,-1 -s_analytic_antialias_convex,-1 -s_analytic_antialias_general,-1 -s_analytic_gradients,-1 -s_animated-image-blurs,-1 -s_anisotropic_hq,-1 -s_arccirclegap,-1 -s_arcofzorro,-1 -s_arcto,-1 -s_b_119394958,-1 -s_badpaint,-1 -s_bezier_conic_effects,-1 -s_bezier_quad_effects,-1 -s_beziers,-1 -s_big_rrect_circle_aa_effect,-1 -s_big_rrect_circular_corner_aa_effect,-1 -s_big_rrect_ellipse_aa_effect,-1 -s_big_rrect_elliptical_corner_aa_effect,-1 -s_big_rrect_rect_aa_effect,-1 -s_bigblurs,-1 -s_bigconic,-1 -s_bigmatrix,-1 -s_bigrect,-1 -s_bigtext,-1 -s_bigtileimagefilter,-1 -s_bitmap_subset_shader,-1 -s_bitmapfilters,-1 -s_bitmaprect_i,-1 -s_bitmaprect_s,-1 -s_bitmaptiled_fractional_horizontal,-1 -s_bitmaptiled_fractional_vertical,-1 -s_bleed,-1 -s_bleed_alpha_bmp,-1 -s_bleed_alpha_bmp_shader,-1 -s_bleed_alpha_image,-1 -s_bleed_alpha_image_shader,-1 -s_bleed_image,-1 -s_blur2rects,-1 -s_blur_ignore_xform_circle,-1 -s_blur_ignore_xform_rect,-1 -s_blur_ignore_xform_rrect,-1 -s_blur_image,-1 -s_blurcircles,-1 -s_blurimagevmask,-1 -s_blurquickreject,-1 -s_blurrects,-1 -s_blurredclippedcircle,-1 -s_blurroundrect-WH-100x100-unevenCorners,-1 -s_blurs,-1 -s_bmp_filter_quality_repeat,-1 -s_bug5099,-1 -s_bug5252,-1 -s_bug530095,-1 -s_bug615686,-1 -s_bug6987,-1 -s_c_gms,-1 -s_chrome_gradtext2,-1 -s_circle_sizes,-1 -s_circles,-1 -s_circular-clips,-1 -s_circular_arcs_fill,-1 -s_circular_arcs_hairline,-1 -s_circular_arcs_stroke_and_fill_butt,-1 -s_circular_arcs_stroke_and_fill_round,-1 -s_circular_arcs_stroke_and_fill_square,-1 -s_circular_arcs_stroke_butt,-1 -s_circular_arcs_stroke_round,-1 -s_circular_arcs_stroke_square,-1 -s_circular_arcs_weird,-1 -s_clamped_gradients,-1 -s_clamped_gradients_nodither,-1 -s_clip_strokerect,-1 -s_clipcubic,-1 -s_clippedcubic2,-1 -s_clockwise,-1 -s_colorcomposefilter_wacky,-1 -s_coloremoji,-1 -s_combo-patheffects,-1 -s_complexclip2_path_aa,-1 -s_complexclip2_rect_aa,-1 -s_complexclip2_rrect_aa,-1 -s_complexclip2_rrect_bw,-1 -s_complexclip3_complex,-1 -s_complexclip3_simple,-1 -s_complexclip4_aa,-1 -s_complexclip_aa,-1 -s_complexclip_aa_invert,-1 -s_complexclip_aa_layer,-1 -s_complexclip_aa_layer_invert,-1 -s_complexclip_bw,-1 -s_complexclip_bw_invert,-1 -s_complexclip_bw_layer,-1 -s_complexclip_bw_layer_invert,-1 -s_composeshader_bitmap2,-1 -s_concavepaths,-1 -s_conicpaths,-1 -s_const_color_processor,-1 -s_contour_start,-1 -s_convex-lineonly-paths,-1 -s_convex-lineonly-paths-stroke-and-fill,-1 -s_convex-polygon-inset,-1 -s_convex_poly_clip,-1 -s_convex_poly_effect,-1 -s_convexpaths,-1 -s_crbug_640176,-1 -s_crbug_788500,-1 -s_crbug_847759,-1 -s_crbug_884166,-1 -s_crbug_887103,-1 -s_crbug_888453,-1 -s_crbug_892988,-1 -s_cross_context_image,-1 -s_cubicclosepath,-1 -s_cubicpath,-1 -s_daa,-1 -s_dash_line_zero_off_interval,-1 -s_dashcircle,-1 -s_dashcircle2,-1 -s_dashcubics,-1 -s_dashing,-1 -s_dashing2,-1 -s_dashing3,-1 -s_dashing4,-1 -s_dashing5_aa,-1 -s_dashing5_bw,-1 -s_degeneratesegments,-1 -s_dftext,-1 -s_dftext_blob_persp,-1 -s_displacement,-1 -s_dont_clip_to_layer,-1 -s_downsamplebitmap_checkerboard_high,-1 -s_downsamplebitmap_checkerboard_low,-1 -s_downsamplebitmap_checkerboard_medium,-1 -s_downsamplebitmap_checkerboard_none,-1 -s_downsamplebitmap_image_high,-1 -s_downsamplebitmap_image_low,-1 -s_downsamplebitmap_image_medium,-1 -s_downsamplebitmap_image_none,-1 -s_downsamplebitmap_text_high,-1 -s_downsamplebitmap_text_low,-1 -s_downsamplebitmap_text_medium,-1 -s_downsamplebitmap_text_none,-1 -s_draw-atlas,-1 -s_drawTextRSXform,-1 -s_draw_image_set,-1 -s_draw_image_set_rect_to_rect,-1 -s_draw_quad_set,-1 -s_drawable,-1 -s_drawbitmaprect-imagerect-subset,-1 -s_drawbitmaprect-subset,-1 -s_drawlooper,-1 -s_drawminibitmaprect,-1 -s_drawminibitmaprect_aa,-1 -s_drawregionmodes,-1 -s_dropshadowimagefilter,-1 -s_drrect,-1 -s_drrect_small_inner,-1 -s_dstreadshuffle,-1 -s_emboss,-1 -s_emptypath,-1 -s_extractbitmap,-1 -s_fancy_gradients,-1 -s_fancyblobunderline,-1 -s_fatpathfill,-1 -s_fillcircle,-1 -s_filltypes,-1 -s_filltypespersp,-1 -s_filterbitmap_checkerboard_192_192,-1 -s_filterbitmap_checkerboard_32_2,-1 -s_filterbitmap_checkerboard_32_32,-1 -s_filterbitmap_checkerboard_32_32_g8,-1 -s_filterbitmap_checkerboard_32_8,-1 -s_filterbitmap_checkerboard_4_4,-1 -s_filterbitmap_image_color_wheel.png,-1 -s_filterbitmap_image_mandrill_128.png,-1 -s_filterbitmap_image_mandrill_16.png,-1 -s_filterbitmap_image_mandrill_256.png,-1 -s_filterbitmap_image_mandrill_32.png,-1 -s_filterbitmap_image_mandrill_512.png,-1 -s_filterbitmap_image_mandrill_64.png,-1 -s_filterbitmap_image_mandrill_64.png_g8,-1 -s_filterbitmap_text_10.00pt,-1 -s_filterbitmap_text_3.00pt,-1 -s_filterbitmap_text_7.00pt,-1 -s_filterbug,-1 -s_filterfastbounds,-1 -s_filterindiabox,-1 -s_flippity,-1 -s_fontcache,-1 -s_fontcache-mt,-1 -s_fontmgr_bounds,-1 -s_fontmgr_bounds_0.75_0,-1 -s_fontmgr_bounds_1_-0.25,-1 -s_fontmgr_iter,-1 -s_fontmgr_match,-1 -s_fontregen,-1 -s_fontscaler,-1 -s_fontscalerdistortable,-1 -s_fwidth_squircle,-1 -s_gamma,-1 -s_getpostextpath,-1 -s_giantbitmap_clamp_bilerp_rotate,-1 -s_giantbitmap_clamp_bilerp_scale,-1 -s_giantbitmap_mirror_bilerp_rotate,-1 -s_giantbitmap_mirror_bilerp_scale,-1 -s_giantbitmap_repeat_bilerp_rotate,-1 -s_giantbitmap_repeat_bilerp_scale,-1 -s_glyph_pos_h_b,-1 -s_glyph_pos_h_f,-1 -s_glyph_pos_h_s,-1 -s_glyph_pos_n_b,-1 -s_glyph_pos_n_f,-1 -s_glyph_pos_n_s,-1 -s_gradient_dirty_laundry,-1 -s_gradients,-1 -s_gradients4f,-1 -s_gradients4f_nodither,-1 -s_gradients_2pt_conical_edge,-1 -s_gradients_2pt_conical_edge_mirror,-1 -s_gradients_2pt_conical_edge_nodither,-1 -s_gradients_2pt_conical_edge_repeat,-1 -s_gradients_2pt_conical_inside,-1 -s_gradients_2pt_conical_inside_mirror,-1 -s_gradients_2pt_conical_inside_nodither,-1 -s_gradients_2pt_conical_inside_repeat,-1 -s_gradients_2pt_conical_outside,-1 -s_gradients_2pt_conical_outside_mirror,-1 -s_gradients_2pt_conical_outside_nodither,-1 -s_gradients_2pt_conical_outside_repeat,-1 -s_gradients_degenerate_2pt,-1 -s_gradients_degenerate_2pt_nodither,-1 -s_gradients_dup_color_stops,-1 -s_gradients_local_perspective,-1 -s_gradients_local_perspective_nodither,-1 -s_gradients_no_texture,-1 -s_gradients_no_texture_nodither,-1 -s_gradients_nodither,-1 -s_gradients_view_perspective,-1 -s_gradients_view_perspective_nodither,-1 -s_hairlines,-1 -s_hairmodes,-1 -s_hittestpath,-1 -s_hsl,-1 -s_hugebitmapshader,-1 -s_image-cacherator-from-picture,-1 -s_image-cacherator-from-raster,-1 -s_image-cacherator-from-texture,-1 -s_image-picture,-1 -s_image-shader,-1 -s_image_from_yuv_textures,-1 -s_image_scale_aligned,-1 -s_imagealphathreshold_image,-1 -s_imageblur,-1 -s_imagefilters_xfermodes,-1 -s_imagefiltersbase,-1 -s_imagefiltersclipped,-1 -s_imagefilterscropexpand,-1 -s_imagefilterscropped,-1 -s_imagefiltersscaled,-1 -s_imagefiltersstroked,-1 -s_imagefilterstransformed,-1 -s_imagemagnifier,-1 -s_imagemagnifier_cropped,-1 -s_imagemakewithfilter,-1 -s_imageresizetiled,-1 -s_imagesource,-1 -s_imagesrc2_low,-1 -s_innershapes,-1 -s_innershapes_bw,-1 -s_internal_links,-1 -s_inverse_paths,-1 -s_largecircle,-1 -s_lattice,-1 -s_lcdoverlap,-1 -s_lcdtext,-1 -s_lighting,-1 -s_lightingshader2,-1 -s_lineclosepath,-1 -s_linepath,-1 -s_localmatrixshader_nested,-1 -s_longlinedash,-1 -s_longpathdash,-1 -s_longwavyline,-1 -s_lumafilter,-1 -s_maddash,-1 -s_mandoline,-1 -s_manyarcs,-1 -s_manycircles,-1 -s_manyrrects,-1 -s_matrixconvolution,-1 -s_matrixconvolution_color,-1 -s_matriximagefilter,-1 -s_mipmap,-1 -s_mixedtextblobs,-1 -s_morphology,-1 -s_nested_aa,-1 -s_nested_flipY_aa,-1 -s_nested_flipY_bw,-1 -s_new_texture_image,-1 -s_ninepatch-stretch,-1 -s_nonclosedpaths,-1 -s_ovals,-1 -s_p3_ovals,-1 -s_parsedpaths,-1 -s_patch_image,-1 -s_path-reverse,-1 -s_path_huge_crbug_800804,-1 -s_path_mask_cache,-1 -s_patheffect,-1 -s_pathfill,-1 -s_pathinterior,-1 -s_pathinvfill,-1 -s_pathopsinverse,-1 -s_pathopsskpclip,-1 -s_pdf_never_embed,-1 -s_persp_images,-1 -s_persp_shaders_aa,-1 -s_persp_shaders_bw,-1 -s_pictureimagefilter,-1 -s_pictureshader,-1 -s_pictureshader_localwrapper,-1 -s_pixel_snap_combo,-1 -s_pixel_snap_line,-1 -s_pixel_snap_point,-1 -s_pixel_snap_rect,-1 -s_points,-1 -s_poly2poly,-1 -s_polygons,-1 -s_quadcap,-1 -s_quadclosepath,-1 -s_quadpath,-1 -s_radial_gradient4,-1 -s_radial_gradient4_nodither,-1 -s_rectangle_texture,-1 -s_rects,-1 -s_rects_as_paths,-1 -s_repeated_bitmap_jpg,-1 -s_resizeimagefilter,-1 -s_rotate_imagefilter,-1 -s_rotatedcubicpath,-1 -s_roundrects,-1 -s_rrect,-1 -s_rrect_clip_aa,-1 -s_rrect_clip_draw_paint,-1 -s_rrect_draw_aa,-1 -s_rrect_effect,-1 -s_save_behind,-1 -s_savelayer_clipmask,-1 -s_savelayer_coverage,-1 -s_savelayer_initfromprev,-1 -s_savelayer_maskfilter,-1 -s_savelayer_with_backdrop,-1 -s_scaled_tilemodes_npot,-1 -s_scaledemoji_rendering,-1 -s_scaledstrokes,-1 -s_shadermaskfilter_gradient,-1 -s_shadermaskfilter_image,-1 -s_shadermaskfilter_localmatrix,-1 -s_shadertext3,-1 -s_shadow_utils,-1 -s_shadow_utils_gray,-1 -s_shadow_utils_occl,-1 -s_shadows,-1 -s_sharedcorners,-1 -s_simple-magnification,-1 -s_simple-offsetimagefilter,-1 -s_simple-polygon-offset,-1 -s_simpleaaclip_path,-1 -s_simpleaaclip_rect,-1 -s_simpleblurroundrect,-1 -s_simplerect,-1 -s_simpleshapes,-1 -s_skbug_257,-1 -s_skbug_4868,-1 -s_skbug_8664,-1 -s_skinning,-1 -s_skinning_cached,-1 -s_skinning_cpu,-1 -s_skinning_cpu_cached,-1 -s_smallarc,-1 -s_smallpaths,-1 -s_squarehair,-1 -s_stlouisarch,-1 -s_stringart,-1 -s_stroke-fill,-1 -s_stroke_rect_shader,-1 -s_strokecircle,-1 -s_strokedlines,-1 -s_strokerect,-1 -s_strokerects,-1 -s_strokes3,-1 -s_strokes_poly,-1 -s_strokes_round,-1 -s_stroketext,-1 -s_sweep_tiling,-1 -s_tall_stretched_bitmaps,-1 -s_teenyStrokes,-1 -s_testgradient,-1 -s_text_scale_skew,-1 -s_textblob,-1 -s_textblobgeometrychange,-1 -s_textbloblooper,-1 -s_textblobmixedsizes,-1 -s_textblobmixedsizes_df,-1 -s_textblobrandomfont,-1 -s_textblobshader,-1 -s_textblobtransforms,-1 -s_textblobuseaftergpufree,-1 -s_texture_domain_effect,-1 -s_texture_domain_effect_bilerp,-1 -s_texture_domain_effect_mipmap,-1 -s_thinconcavepaths,-1 -s_thinrects,-1 -s_thinstrokedrects,-1 -s_tiledscaledbitmap,-1 -s_tileimagefilter,-1 -s_tilemode_decal,-1 -s_tilemode_gradient,-1 -s_tilemodes,-1 -s_tilemodes_npot,-1 -s_tinyanglearcs,-1 -s_trickycubicstrokes,-1 -s_trimpatheffect,-1 -s_typefacerendering,-1 -s_typefacerendering_pfa,-1 -s_typefacerendering_pfb,-1 -s_typefacestyles_kerning,-1 -s_varied_text_clipped_lcd,-1 -s_varied_text_clipped_no_lcd,-1 -s_varied_text_ignorable_clip_lcd,-1 -s_varied_text_ignorable_clip_no_lcd,-1 -s_vertices,-1 -s_vertices_batching,-1 -s_vertices_scaled_shader,-1 -s_verylarge_picture_image,-1 -s_verylargebitmap,-1 -s_windowrectangles,-1 -s_windowrectangles_mask,-1 -s_xfermodeimagefilter,-1 -s_xfermodes,-1 -s_yuv_nv12_to_rgb_effect,-1 -s_yuv_to_rgb_effect,-1 -s_zeroPath,-1 -s_zero_control_stroke,-1 -s_zero_length_paths_bw,-1 -s_zero_length_paths_dbl_aa,-1 -s_zero_length_paths_dbl_bw,-1 -s_zerolinestroke,-1 -windowrectangles_mask,-1 -BlurDrawImage,-1 -CubicStroke,-1 -OverStroke,-1 -aaclip,-1 -aarectmodes,-1 -aaxfermodes,-1 -addarc,-1 -addarc_meas,-1 -analytic_antialias_convex,-1 -analytic_antialias_general,-1 -analytic_gradients,-1 -animated-image-blurs,-1 -anisotropic_hq,-1 -arccirclegap,-1 -arcofzorro,-1 -arcto,-1 -b_119394958,-1 -badpaint,-1 -bezier_conic_effects,-1 -bezier_quad_effects,-1 -beziers,-1 -big_rrect_circle_aa_effect,-1 -big_rrect_circular_corner_aa_effect,-1 -big_rrect_ellipse_aa_effect,-1 -big_rrect_elliptical_corner_aa_effect,-1 -big_rrect_rect_aa_effect,-1 -bigblurs,-1 -bigconic,-1 -bigmatrix,-1 -bigrect,-1 -bigtext,-1 -bigtileimagefilter,-1 -bitmap_subset_shader,-1 -bitmapfilters,-1 -bitmaprect_i,-1 -bitmaprect_s,-1 -bitmaptiled_fractional_horizontal,-1 -bitmaptiled_fractional_vertical,-1 -bleed,-1 -bleed_alpha_bmp,-1 -bleed_alpha_bmp_shader,-1 -bleed_alpha_image,-1 -bleed_alpha_image_shader,-1 -bleed_image,-1 -blur2rects,-1 -blur_ignore_xform_circle,-1 -blur_ignore_xform_rect,-1 -blur_ignore_xform_rrect,-1 -blur_image,-1 -blurcircles,-1 -blurimagevmask,-1 -blurquickreject,-1 -blurrects,-1 -blurredclippedcircle,-1 -blurroundrect-WH-100x100-unevenCorners,-1 -blurs,-1 -bmp_filter_quality_repeat,-1 -bug5099,-1 -bug5252,-1 -bug530095,-1 -bug615686,-1 -bug6987,-1 -c_gms,-1 -chrome_gradtext2,-1 -circle_sizes,-1 -circles,-1 -circular-clips,-1 -circular_arcs_fill,-1 -circular_arcs_hairline,-1 -circular_arcs_stroke_and_fill_butt,-1 -circular_arcs_stroke_and_fill_round,-1 -circular_arcs_stroke_and_fill_square,-1 -circular_arcs_stroke_butt,-1 -circular_arcs_stroke_round,-1 -circular_arcs_stroke_square,-1 -circular_arcs_weird,-1 -clamped_gradients,-1 -clamped_gradients_nodither,-1 -clip_strokerect,-1 -clipcubic,-1 -clippedcubic2,-1 -clockwise,-1 -colorcomposefilter_wacky,-1 -coloremoji,-1 -combo-patheffects,-1 -complexclip2_path_aa,-1 -complexclip2_rect_aa,-1 -complexclip2_rrect_aa,-1 -complexclip2_rrect_bw,-1 -complexclip3_complex,-1 -complexclip3_simple,-1 -complexclip4_aa,-1 -complexclip_aa,-1 -complexclip_aa_invert,-1 -complexclip_aa_layer,-1 -complexclip_aa_layer_invert,-1 -complexclip_bw,-1 -complexclip_bw_invert,-1 -complexclip_bw_layer,-1 -complexclip_bw_layer_invert,-1 -composeshader_bitmap2,-1 -concavepaths,-1 -conicpaths,-1 -const_color_processor,-1 -contour_start,-1 -convex-lineonly-paths,-1 -convex-lineonly-paths-stroke-and-fill,-1 -convex-polygon-inset,-1 -convex_poly_clip,-1 -convex_poly_effect,-1 -convexpaths,-1 -crbug_640176,-1 -crbug_788500,-1 -crbug_847759,-1 -crbug_884166,-1 -crbug_887103,-1 -crbug_888453,-1 -crbug_892988,-1 -cross_context_image,-1 -cubicclosepath,-1 -cubicpath,-1 -daa,-1 -dash_line_zero_off_interval,-1 -dashcircle,-1 -dashcircle2,-1 -dashcubics,-1 -dashing,-1 -dashing2,-1 -dashing3,-1 -dashing4,-1 -dashing5_aa,-1 -dashing5_bw,-1 -degeneratesegments,-1 -dftext,-1 -dftext_blob_persp,-1 -displacement,-1 -dont_clip_to_layer,-1 -downsamplebitmap_checkerboard_high,-1 -downsamplebitmap_checkerboard_low,-1 -downsamplebitmap_checkerboard_medium,-1 -downsamplebitmap_checkerboard_none,-1 -downsamplebitmap_image_high,-1 -downsamplebitmap_image_low,-1 -downsamplebitmap_image_medium,-1 -downsamplebitmap_image_none,-1 -downsamplebitmap_text_high,-1 -downsamplebitmap_text_low,-1 -downsamplebitmap_text_medium,-1 -downsamplebitmap_text_none,-1 -draw-atlas,-1 -drawTextRSXform,-1 -draw_image_set,-1 -draw_image_set_rect_to_rect,-1 -draw_quad_set,-1 -drawable,-1 -drawbitmaprect-imagerect-subset,-1 -drawbitmaprect-subset,-1 -drawlooper,-1 -drawminibitmaprect,-1 -drawminibitmaprect_aa,-1 -drawregionmodes,-1 -dropshadowimagefilter,-1 -drrect,-1 -drrect_small_inner,-1 -dstreadshuffle,-1 -emboss,-1 -emptypath,-1 -extractbitmap,-1 -fancy_gradients,-1 -fancyblobunderline,-1 -fatpathfill,-1 -fillcircle,-1 -filltypes,-1 -filltypespersp,-1 -filterbitmap_checkerboard_192_192,-1 -filterbitmap_checkerboard_32_2,-1 -filterbitmap_checkerboard_32_32,-1 -filterbitmap_checkerboard_32_32_g8,-1 -filterbitmap_checkerboard_32_8,-1 -filterbitmap_checkerboard_4_4,-1 -filterbitmap_image_color_wheel.png,-1 -filterbitmap_image_mandrill_128.png,-1 -filterbitmap_image_mandrill_16.png,-1 -filterbitmap_image_mandrill_256.png,-1 -filterbitmap_image_mandrill_32.png,-1 -filterbitmap_image_mandrill_512.png,-1 -filterbitmap_image_mandrill_64.png,-1 -filterbitmap_image_mandrill_64.png_g8,-1 -filterbitmap_text_10.00pt,-1 -filterbitmap_text_3.00pt,-1 -filterbitmap_text_7.00pt,-1 -filterbug,-1 -filterfastbounds,-1 -filterindiabox,-1 -flippity,-1 -fontcache,-1 -fontcache-mt,-1 -fontmgr_bounds,-1 -fontmgr_bounds_0.75_0,-1 -fontmgr_bounds_1_-0.25,-1 -fontmgr_iter,-1 -fontmgr_match,-1 -fontregen,-1 -fontscaler,-1 -fontscalerdistortable,-1 -fwidth_squircle,-1 -gamma,-1 -getpostextpath,-1 -giantbitmap_clamp_bilerp_rotate,-1 -giantbitmap_clamp_bilerp_scale,-1 -giantbitmap_mirror_bilerp_rotate,-1 -giantbitmap_mirror_bilerp_scale,-1 -giantbitmap_repeat_bilerp_rotate,-1 -giantbitmap_repeat_bilerp_scale,-1 -glyph_pos_h_b,-1 -glyph_pos_h_f,-1 -glyph_pos_h_s,-1 -glyph_pos_n_b,-1 -glyph_pos_n_f,-1 -glyph_pos_n_s,-1 -gradient_dirty_laundry,-1 -gradients,-1 -gradients4f,-1 -gradients4f_nodither,-1 -gradients_2pt_conical_edge,-1 -gradients_2pt_conical_edge_mirror,-1 -gradients_2pt_conical_edge_nodither,-1 -gradients_2pt_conical_edge_repeat,-1 -gradients_2pt_conical_inside,-1 -gradients_2pt_conical_inside_mirror,-1 -gradients_2pt_conical_inside_nodither,-1 -gradients_2pt_conical_inside_repeat,-1 -gradients_2pt_conical_outside,-1 -gradients_2pt_conical_outside_mirror,-1 -gradients_2pt_conical_outside_nodither,-1 -gradients_2pt_conical_outside_repeat,-1 -gradients_degenerate_2pt,-1 -gradients_degenerate_2pt_nodither,-1 -gradients_dup_color_stops,-1 -gradients_local_perspective,-1 -gradients_local_perspective_nodither,-1 -gradients_no_texture,-1 -gradients_no_texture_nodither,-1 -gradients_nodither,-1 -gradients_view_perspective,-1 -gradients_view_perspective_nodither,-1 -hairlines,-1 -hairmodes,-1 -hittestpath,-1 -hsl,-1 -hugebitmapshader,-1 -image-cacherator-from-picture,-1 -image-cacherator-from-raster,-1 -image-cacherator-from-texture,-1 -image-picture,-1 -image-shader,-1 -image_from_yuv_textures,-1 -image_scale_aligned,-1 -imagealphathreshold_image,-1 -imageblur,-1 -imagefilters_xfermodes,-1 -imagefiltersbase,-1 -imagefiltersclipped,-1 -imagefilterscropexpand,-1 -imagefilterscropped,-1 -imagefiltersscaled,-1 -imagefiltersstroked,-1 -imagefilterstransformed,-1 -imagemagnifier,-1 -imagemagnifier_cropped,-1 -imagemakewithfilter,-1 -imageresizetiled,-1 -imagesource,-1 -imagesrc2_low,-1 -innershapes,-1 -innershapes_bw,-1 -internal_links,-1 -inverse_paths,-1 -largecircle,-1 -lattice,-1 -lcdoverlap,-1 -lcdtext,-1 -lighting,-1 -lightingshader2,-1 -lineclosepath,-1 -linepath,-1 -localmatrixshader_nested,-1 -longlinedash,-1 -longpathdash,-1 -longwavyline,-1 -lumafilter,-1 -maddash,-1 -mandoline,-1 -manyarcs,-1 -manycircles,-1 -manyrrects,-1 -matrixconvolution,-1 -matrixconvolution_color,-1 -matriximagefilter,-1 -mipmap,-1 -mixedtextblobs,-1 -morphology,-1 -nested_aa,-1 -nested_flipY_aa,-1 -nested_flipY_bw,-1 -new_texture_image,-1 -ninepatch-stretch,-1 -nonclosedpaths,-1 -ovals,-1 -p3_ovals,-1 -parsedpaths,-1 -patch_image,-1 -path-reverse,-1 -path_huge_crbug_800804,-1 -path_mask_cache,-1 -patheffect,-1 -pathfill,-1 -pathinterior,-1 -pathinvfill,-1 -pathopsinverse,-1 -pathopsskpclip,-1 -pdf_never_embed,-1 -persp_images,-1 -persp_shaders_aa,-1 -persp_shaders_bw,-1 -pictureimagefilter,-1 -pictureshader,-1 -pictureshader_localwrapper,-1 -pixel_snap_combo,-1 -pixel_snap_line,-1 -pixel_snap_point,-1 -pixel_snap_rect,-1 -points,-1 -poly2poly,-1 -polygons,-1 -quadcap,-1 -quadclosepath,-1 -quadpath,-1 -radial_gradient4,-1 -radial_gradient4_nodither,-1 -rectangle_texture,-1 -rects,-1 -rects_as_paths,-1 -repeated_bitmap_jpg,-1 -resizeimagefilter,-1 -rotate_imagefilter,-1 -rotatedcubicpath,-1 -roundrects,-1 -rrect,-1 -rrect_clip_aa,-1 -rrect_clip_draw_paint,-1 -rrect_draw_aa,-1 -rrect_effect,-1 -save_behind,-1 -savelayer_clipmask,-1 -savelayer_coverage,-1 -savelayer_initfromprev,-1 -savelayer_maskfilter,-1 -savelayer_with_backdrop,-1 -scaled_tilemodes_npot,-1 -scaledemoji_rendering,-1 -scaledstrokes,-1 -shadermaskfilter_gradient,-1 -shadermaskfilter_image,-1 -shadermaskfilter_localmatrix,-1 -shadertext3,-1 -shadow_utils,-1 -shadow_utils_gray,-1 -shadow_utils_occl,-1 -shadows,-1 -sharedcorners,-1 -simple-magnification,-1 -simple-offsetimagefilter,-1 -simple-polygon-offset,-1 -simpleaaclip_path,-1 -simpleaaclip_rect,-1 -simpleblurroundrect,-1 -simplerect,-1 -simpleshapes,-1 -skbug_257,-1 -skbug_4868,-1 -skbug_8664,-1 -skinning,-1 -skinning_cached,-1 -skinning_cpu,-1 -skinning_cpu_cached,-1 -smallarc,-1 -smallpaths,-1 -squarehair,-1 -stlouisarch,-1 -stringart,-1 -stroke-fill,-1 -stroke_rect_shader,-1 -strokecircle,-1 -strokedlines,-1 -strokerect,-1 -strokerects,-1 -strokes3,-1 -strokes_poly,-1 -strokes_round,-1 -stroketext,-1 -sweep_tiling,-1 -tall_stretched_bitmaps,-1 -teenyStrokes,-1 -testgradient,-1 -text_scale_skew,-1 -textblob,-1 -textblobgeometrychange,-1 -textbloblooper,-1 -textblobmixedsizes,-1 -textblobmixedsizes_df,-1 -textblobrandomfont,-1 -textblobshader,-1 -textblobtransforms,-1 -textblobuseaftergpufree,-1 -texture_domain_effect,-1 -texture_domain_effect_bilerp,-1 -texture_domain_effect_mipmap,-1 -thinconcavepaths,-1 -thinrects,-1 -thinstrokedrects,-1 -tiledscaledbitmap,-1 -tileimagefilter,-1 -tilemode_decal,-1 -tilemode_gradient,-1 -tilemodes,-1 -tilemodes_npot,-1 -tinyanglearcs,-1 -trickycubicstrokes,-1 -trimpatheffect,-1 -typefacerendering,-1 -typefacerendering_pfa,-1 -typefacerendering_pfb,-1 -typefacestyles_kerning,-1 -varied_text_clipped_lcd,-1 -varied_text_clipped_no_lcd,-1 -varied_text_ignorable_clip_lcd,-1 -varied_text_ignorable_clip_no_lcd,-1 -vertices,-1 -vertices_batching,-1 -vertices_scaled_shader,-1 -verylarge_picture_image,-1 -verylargebitmap,-1 -windowrectangles,-1 -windowrectangles_mask,-1 -xfermodeimagefilter,-1 -xfermodes,-1 -yuv_nv12_to_rgb_effect,-1 -yuv_to_rgb_effect,-1 -zeroPath,-1 -zero_control_stroke,-1 -zero_length_paths_bw,-1 -zero_length_paths_dbl_aa,-1 -zero_length_paths_dbl_bw,-1 -zerolinestroke,-1 diff --git a/lib/mesa/src/freedreno/ci/freedreno-a630-skqp-vk_rendertests-flakes.txt b/lib/mesa/src/freedreno/ci/freedreno-a630-skqp-vk_rendertests-flakes.txt deleted file mode 100644 index 601050e5b..000000000 --- a/lib/mesa/src/freedreno/ci/freedreno-a630-skqp-vk_rendertests-flakes.txt +++ /dev/null @@ -1,12 +0,0 @@ -# GPU crash -# https://gitlab.freedesktop.org/gallo/mesa/-/jobs/26385169#L4459 -convex-lineonly-paths - -# The following test group pass on the first 3 runs, but may flake eventually. -# This is an assumption, since those ones were set as failed by Android CTS -blur2rectsnonninepatch -bug339297_as_clip -bug6083 -cliperror -dashtextcaps -largeglyphblur diff --git a/lib/mesa/src/freedreno/ci/freedreno-a630-skqp_unittests-fails.txt b/lib/mesa/src/freedreno/ci/freedreno-a630-skqp_unittests-fails.txt deleted file mode 100644 index 13926b75f..000000000 --- a/lib/mesa/src/freedreno/ci/freedreno-a630-skqp_unittests-fails.txt +++ /dev/null @@ -1 +0,0 @@ -SRGBReadWritePixels diff --git a/lib/mesa/src/freedreno/computerator/a4xx.c b/lib/mesa/src/freedreno/computerator/a4xx.c deleted file mode 100644 index 0dbb30712..000000000 --- a/lib/mesa/src/freedreno/computerator/a4xx.c +++ /dev/null @@ -1,348 +0,0 @@ -/* - * Copyright © 2021 Ilia Mirkin - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ir3/ir3_compiler.h" - -#include "util/u_math.h" -#include "util/u_queue.h" -#include "util/half_float.h" - -#include "adreno_pm4.xml.h" -#include "adreno_common.xml.h" -#include "a4xx.xml.h" - -#include "ir3_asm.h" -#include "main.h" - -struct a4xx_backend { - struct backend base; - - struct ir3_compiler *compiler; - struct fd_device *dev; -}; -define_cast(backend, a4xx_backend); - -/* - * Backend implementation: - */ - -static struct kernel * -a4xx_assemble(struct backend *b, FILE *in) -{ - struct a4xx_backend *a4xx_backend = to_a4xx_backend(b); - struct ir3_kernel *ir3_kernel = ir3_asm_assemble(a4xx_backend->compiler, in); - ir3_kernel->backend = b; - return &ir3_kernel->base; -} - -static void -a4xx_disassemble(struct kernel *kernel, FILE *out) -{ - ir3_asm_disassemble(to_ir3_kernel(kernel), out); -} - -static void -cs_program_emit(struct fd_ringbuffer *ring, struct kernel *kernel) -{ - struct ir3_kernel *ir3_kernel = to_ir3_kernel(kernel); - struct ir3_shader_variant *v = ir3_kernel->v; - const struct ir3_info *i = &v->info; - enum a3xx_threadsize thrsz = i->double_threadsize ? FOUR_QUADS : TWO_QUADS; - - OUT_PKT0(ring, REG_A4XX_UCHE_INVALIDATE0, 2); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, 0x00000012); - - OUT_WFI(ring); - - OUT_PKT0(ring, REG_A4XX_SP_MODE_CONTROL, 1); - OUT_RING(ring, 0x0000001e); - - OUT_PKT0(ring, REG_A4XX_TPL1_TP_MODE_CONTROL, 1); - OUT_RING(ring, 0x00000038); - - OUT_PKT0(ring, REG_A4XX_TPL1_TP_FS_TEX_COUNT, 1); - OUT_RING(ring, 0x00000000); - - OUT_WFI(ring); - - OUT_PKT0(ring, REG_A4XX_HLSQ_MODE_CONTROL, 1); - OUT_RING(ring, 0x00000003); - - OUT_PKT0(ring, REG_A4XX_HLSQ_CONTROL_0_REG, 1); - OUT_RING(ring, 0x080005f0); - - OUT_PKT0(ring, REG_A4XX_HLSQ_UPDATE_CONTROL, 1); - OUT_RING(ring, 0x00000038); - - OUT_PKT0(ring, REG_A4XX_SP_SP_CTRL_REG, 1); - OUT_RING(ring, 0x00860010); - // OUT_RING(ring, 0x00920000); - - OUT_PKT0(ring, REG_A4XX_SP_INSTR_CACHE_CTRL, 1); - OUT_RING(ring, 0x000004ff); - // OUT_RING(ring, 0x00000260); - - OUT_PKT0(ring, REG_A4XX_SP_FS_CTRL_REG1, 1); - OUT_RING(ring, 0x80000000); - - OUT_PKT0(ring, REG_A4XX_SP_CS_CTRL_REG0, 1); - OUT_RING(ring, - A4XX_SP_CS_CTRL_REG0_THREADSIZE(thrsz) | - A4XX_SP_CS_CTRL_REG0_SUPERTHREADMODE | - A4XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(i->max_half_reg + 1) | - A4XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(i->max_reg + 1)); - - OUT_PKT0(ring, REG_A4XX_HLSQ_CS_CONTROL_REG, 1); - OUT_RING(ring, A4XX_HLSQ_CS_CONTROL_REG_CONSTOBJECTOFFSET(0) | - A4XX_HLSQ_CS_CONTROL_REG_SHADEROBJOFFSET(0) | - A4XX_HLSQ_CS_CONTROL_REG_ENABLED | - A4XX_HLSQ_CS_CONTROL_REG_INSTRLENGTH(1) | - COND(v->has_ssbo, A4XX_HLSQ_CS_CONTROL_REG_SSBO_ENABLE) | - A4XX_HLSQ_CS_CONTROL_REG_CONSTLENGTH(v->constlen / 4)); - - OUT_PKT0(ring, REG_A4XX_SP_CS_OBJ_START, 1); - OUT_RELOC(ring, v->bo, 0, 0, 0); /* SP_CS_OBJ_START */ - - OUT_PKT0(ring, REG_A4XX_SP_CS_LENGTH_REG, 1); - OUT_RING(ring, v->instrlen); - - uint32_t local_invocation_id, work_group_id, num_wg_id; - local_invocation_id = - ir3_find_sysval_regid(v, SYSTEM_VALUE_LOCAL_INVOCATION_ID); - work_group_id = ir3_kernel->info.wgid; - num_wg_id = ir3_kernel->info.numwg; - - OUT_PKT0(ring, REG_A4XX_HLSQ_CL_CONTROL_0, 2); - OUT_RING(ring, A4XX_HLSQ_CL_CONTROL_0_WGIDCONSTID(work_group_id) | - A4XX_HLSQ_CL_CONTROL_0_UNK12CONSTID(regid(63, 0)) | - A4XX_HLSQ_CL_CONTROL_0_LOCALIDREGID(local_invocation_id)); - OUT_RING(ring, A4XX_HLSQ_CL_CONTROL_1_UNK0CONSTID(regid(63, 0)) | - A4XX_HLSQ_CL_CONTROL_1_UNK12CONSTID(regid(63, 0))); - - OUT_PKT0(ring, REG_A4XX_HLSQ_CL_KERNEL_CONST, 1); - OUT_RING(ring, A4XX_HLSQ_CL_KERNEL_CONST_UNK0CONSTID(regid(63, 0)) | - A4XX_HLSQ_CL_KERNEL_CONST_NUMWGCONSTID(num_wg_id)); - - OUT_PKT0(ring, REG_A4XX_HLSQ_CL_WG_OFFSET, 1); - OUT_RING(ring, A4XX_HLSQ_CL_WG_OFFSET_UNK0CONSTID(regid(63, 0))); - - OUT_PKT3(ring, CP_LOAD_STATE4, 2); - OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) | - CP_LOAD_STATE4_0_STATE_SRC(SS4_INDIRECT) | - CP_LOAD_STATE4_0_STATE_BLOCK(SB4_CS_SHADER) | - CP_LOAD_STATE4_0_NUM_UNIT(v->instrlen)); - OUT_RELOC(ring, v->bo, 0, CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER), 0); -} - -static void -emit_const(struct fd_ringbuffer *ring, struct kernel *kernel, uint32_t constid, uint32_t sizedwords, - const uint32_t *dwords) -{ - uint32_t align_sz; - - debug_assert((constid % 4) == 0); - - /* Overwrite appropriate entries with buffer addresses */ - struct fd_bo **replacements = calloc(sizedwords, sizeof(struct fd_bo *)); - for (int i = 0; i < MAX_BUFS; i++) { - if (kernel->buf_addr_regs[i] != INVALID_REG) { - int idx = kernel->buf_addr_regs[i]; - assert(idx < sizedwords); - - replacements[idx] = kernel->bufs[i]; - } - } - - align_sz = align(sizedwords, 4); - - OUT_PKT3(ring, CP_LOAD_STATE4, 2 + align_sz); - OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(constid / 4) | - CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | - CP_LOAD_STATE4_0_STATE_BLOCK(SB4_CS_SHADER) | - CP_LOAD_STATE4_0_NUM_UNIT(DIV_ROUND_UP(sizedwords, 4))); - OUT_RING(ring, CP_LOAD_STATE4_1_EXT_SRC_ADDR(0) | - CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS)); - for (unsigned i = 0; i < sizedwords; i++) { - if (replacements[i]) - OUT_RELOC(ring, replacements[i], 0, 0, 0); - else - OUT_RING(ring, dwords[i]); - } - - /* Zero-pad to multiple of 4 dwords */ - for (uint32_t i = sizedwords; i < align_sz; i++) { - OUT_RING(ring, 0); - } - - free(replacements); -} - -static void -cs_const_emit(struct fd_ringbuffer *ring, struct kernel *kernel, - uint32_t grid[3]) -{ - struct ir3_kernel *ir3_kernel = to_ir3_kernel(kernel); - struct ir3_shader_variant *v = ir3_kernel->v; - - const struct ir3_const_state *const_state = ir3_const_state(v); - uint32_t base = const_state->offsets.immediate; - int size = DIV_ROUND_UP(const_state->immediates_count, 4); - - /* truncate size to avoid writing constants that shader - * does not use: - */ - size = MIN2(size + base, v->constlen) - base; - - /* convert out of vec4: */ - base *= 4; - size *= 4; - - if (size > 0) { - emit_const(ring, kernel, base, size, const_state->immediates); - } -} - -static void -cs_ibo_emit(struct fd_ringbuffer *ring, struct fd_submit *submit, - struct kernel *kernel) -{ - OUT_PKT3(ring, CP_LOAD_STATE4, 2 + (4 * kernel->num_bufs)); - OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) | - CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | - CP_LOAD_STATE4_0_STATE_BLOCK(SB4_CS_SSBO) | - CP_LOAD_STATE4_0_NUM_UNIT(kernel->num_bufs)); - OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER) | - CP_LOAD_STATE4_1_EXT_SRC_ADDR(0)); - for (unsigned i = 0; i < kernel->num_bufs; i++) { - OUT_RELOC(ring, kernel->bufs[i], 0, 0, 0); -#if 1 - OUT_RING(ring, 0); - OUT_RING(ring, 0); - OUT_RING(ring, 0); -#else - OUT_RING(ring, kernel->buf_sizes[i]); - OUT_RING(ring, kernel->buf_sizes[i]); - OUT_RING(ring, 0x00000004); -#endif - } - - OUT_PKT3(ring, CP_LOAD_STATE4, 2 + (2 * kernel->num_bufs)); - OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) | - CP_LOAD_STATE4_0_STATE_SRC(SS4_DIRECT) | - CP_LOAD_STATE4_0_STATE_BLOCK(SB4_CS_SSBO) | - CP_LOAD_STATE4_0_NUM_UNIT(kernel->num_bufs)); - OUT_RING(ring, CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS) | - CP_LOAD_STATE4_1_EXT_SRC_ADDR(0)); - for (unsigned i = 0; i < kernel->num_bufs; i++) { - unsigned sz = kernel->buf_sizes[i]; - - /* width is in dwords, overflows into height: */ - sz /= 4; - -#if 1 - OUT_RING(ring, A4XX_SSBO_1_0_WIDTH(sz)); - OUT_RING(ring, A4XX_SSBO_1_1_HEIGHT(sz >> 16)); -#else - OUT_RING(ring, A4XX_SSBO_1_0_WIDTH(sz) | - A4XX_SSBO_1_0_FMT(RB4_R32_UINT) | - A4XX_SSBO_1_0_CPP(4)); - OUT_RING(ring, A4XX_SSBO_1_1_HEIGHT(DIV_ROUND_UP(sz, 1 << 16)) | - A4XX_SSBO_1_1_DEPTH(1)); -#endif - } -} - -static void -a4xx_emit_grid(struct kernel *kernel, uint32_t grid[3], - struct fd_submit *submit) -{ - struct fd_ringbuffer *ring = fd_submit_new_ringbuffer( - submit, 0, FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE); - - cs_program_emit(ring, kernel); - cs_const_emit(ring, kernel, grid); - cs_ibo_emit(ring, submit, kernel); - - const unsigned *local_size = kernel->local_size; - const unsigned *num_groups = grid; - - unsigned work_dim = 0; - for (int i = 0; i < 3; i++) { - if (!grid[i]) - break; - work_dim++; - } - - OUT_PKT0(ring, REG_A4XX_HLSQ_CL_NDRANGE_0, 7); - OUT_RING(ring, A4XX_HLSQ_CL_NDRANGE_0_KERNELDIM(work_dim) | - A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEX(local_size[0] - 1) | - A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEY(local_size[1] - 1) | - A4XX_HLSQ_CL_NDRANGE_0_LOCALSIZEZ(local_size[2] - 1)); - OUT_RING(ring, - A4XX_HLSQ_CL_NDRANGE_1_SIZE_X(local_size[0] * num_groups[0])); - OUT_RING(ring, 0); /* HLSQ_CL_NDRANGE_2_GLOBALOFF_X */ - OUT_RING(ring, - A4XX_HLSQ_CL_NDRANGE_3_SIZE_Y(local_size[1] * num_groups[1])); - OUT_RING(ring, 0); /* HLSQ_CL_NDRANGE_4_GLOBALOFF_Y */ - OUT_RING(ring, - A4XX_HLSQ_CL_NDRANGE_5_SIZE_Z(local_size[2] * num_groups[2])); - OUT_RING(ring, 0); /* HLSQ_CL_NDRANGE_6_GLOBALOFF_Z */ - -#if 1 - OUT_PKT3(ring, CP_EXEC_CS, 4); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, CP_EXEC_CS_1_NGROUPS_X(grid[0])); - OUT_RING(ring, CP_EXEC_CS_2_NGROUPS_Y(grid[1])); - OUT_RING(ring, CP_EXEC_CS_3_NGROUPS_Z(grid[2])); -#else - OUT_PKT0(ring, REG_A4XX_HLSQ_CL_KERNEL_GROUP_X, 3); - OUT_RING(ring, grid[0]); /* HLSQ_CL_KERNEL_GROUP_X */ - OUT_RING(ring, grid[1]); /* HLSQ_CL_KERNEL_GROUP_Y */ - OUT_RING(ring, grid[2]); /* HLSQ_CL_KERNEL_GROUP_Z */ - - OUT_PKT3(ring, CP_RUN_OPENCL, 1); - OUT_RING(ring, 0); -#endif - - OUT_WFI(ring); - - /* TODO: cache_flush */ -} - -struct backend * -a4xx_init(struct fd_device *dev, const struct fd_dev_id *dev_id) -{ - struct a4xx_backend *a4xx_backend = calloc(1, sizeof(*a4xx_backend)); - - a4xx_backend->base = (struct backend){ - .assemble = a4xx_assemble, - .disassemble = a4xx_disassemble, - .emit_grid = a4xx_emit_grid, - }; - - a4xx_backend->compiler = ir3_compiler_create(dev, dev_id, false); - a4xx_backend->dev = dev; - - return &a4xx_backend->base; -} diff --git a/lib/mesa/src/freedreno/computerator/a6xx.c b/lib/mesa/src/freedreno/computerator/a6xx.c deleted file mode 100644 index 657e83cfd..000000000 --- a/lib/mesa/src/freedreno/computerator/a6xx.c +++ /dev/null @@ -1,551 +0,0 @@ -/* - * Copyright © 2020 Google, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ir3/ir3_compiler.h" - -#include "util/u_math.h" - -#include "adreno_pm4.xml.h" -#include "adreno_common.xml.h" -#include "a6xx.xml.h" - -#include "common/freedreno_dev_info.h" - -#include "ir3_asm.h" -#include "main.h" - -struct a6xx_backend { - struct backend base; - - struct ir3_compiler *compiler; - struct fd_device *dev; - - const struct fd_dev_info *info; - - unsigned seqno; - struct fd_bo *control_mem; - - struct fd_bo *query_mem; - const struct perfcntr *perfcntrs; - unsigned num_perfcntrs; -}; -define_cast(backend, a6xx_backend); - -/* - * Data structures shared with GPU: - */ - -/* This struct defines the layout of the fd6_context::control buffer: */ -struct fd6_control { - uint32_t seqno; /* seqno for async CP_EVENT_WRITE, etc */ - uint32_t _pad0; - volatile uint32_t vsc_overflow; - uint32_t _pad1; - /* flag set from cmdstream when VSC overflow detected: */ - uint32_t vsc_scratch; - uint32_t _pad2; - uint32_t _pad3; - uint32_t _pad4; - - /* scratch space for VPC_SO[i].FLUSH_BASE_LO/HI, start on 32 byte boundary. */ - struct { - uint32_t offset; - uint32_t pad[7]; - } flush_base[4]; -}; - -#define control_ptr(a6xx_backend, member) \ - (a6xx_backend)->control_mem, offsetof(struct fd6_control, member), 0, 0 - -struct PACKED fd6_query_sample { - uint64_t start; - uint64_t result; - uint64_t stop; -}; - -/* offset of a single field of an array of fd6_query_sample: */ -#define query_sample_idx(a6xx_backend, idx, field) \ - (a6xx_backend)->query_mem, \ - (idx * sizeof(struct fd6_query_sample)) + \ - offsetof(struct fd6_query_sample, field), \ - 0, 0 - -/* - * Backend implementation: - */ - -static struct kernel * -a6xx_assemble(struct backend *b, FILE *in) -{ - struct a6xx_backend *a6xx_backend = to_a6xx_backend(b); - struct ir3_kernel *ir3_kernel = ir3_asm_assemble(a6xx_backend->compiler, in); - ir3_kernel->backend = b; - return &ir3_kernel->base; -} - -static void -a6xx_disassemble(struct kernel *kernel, FILE *out) -{ - ir3_asm_disassemble(to_ir3_kernel(kernel), out); -} - -static void -cs_program_emit(struct fd_ringbuffer *ring, struct kernel *kernel) -{ - struct ir3_kernel *ir3_kernel = to_ir3_kernel(kernel); - struct a6xx_backend *a6xx_backend = to_a6xx_backend(ir3_kernel->backend); - struct ir3_shader_variant *v = ir3_kernel->v; - const struct ir3_info *i = &v->info; - enum a6xx_threadsize thrsz = i->double_threadsize ? THREAD128 : THREAD64; - - OUT_PKT4(ring, REG_A6XX_SP_MODE_CONTROL, 1); - OUT_RING(ring, A6XX_SP_MODE_CONTROL_CONSTANT_DEMOTION_ENABLE | 4); - - OUT_PKT4(ring, REG_A6XX_SP_PERFCTR_ENABLE, 1); - OUT_RING(ring, A6XX_SP_PERFCTR_ENABLE_CS); - - OUT_PKT4(ring, REG_A6XX_SP_FLOAT_CNTL, 1); - OUT_RING(ring, 0); - - OUT_PKT4(ring, REG_A6XX_HLSQ_INVALIDATE_CMD, 1); - OUT_RING( - ring, - A6XX_HLSQ_INVALIDATE_CMD_VS_STATE | A6XX_HLSQ_INVALIDATE_CMD_HS_STATE | - A6XX_HLSQ_INVALIDATE_CMD_DS_STATE | A6XX_HLSQ_INVALIDATE_CMD_GS_STATE | - A6XX_HLSQ_INVALIDATE_CMD_FS_STATE | A6XX_HLSQ_INVALIDATE_CMD_CS_STATE | - A6XX_HLSQ_INVALIDATE_CMD_CS_IBO | A6XX_HLSQ_INVALIDATE_CMD_GFX_IBO); - - unsigned constlen = align(v->constlen, 4); - OUT_PKT4(ring, REG_A6XX_HLSQ_CS_CNTL, 1); - OUT_RING(ring, - A6XX_HLSQ_CS_CNTL_CONSTLEN(constlen) | A6XX_HLSQ_CS_CNTL_ENABLED); - - OUT_PKT4(ring, REG_A6XX_SP_CS_CONFIG, 2); - OUT_RING(ring, A6XX_SP_CS_CONFIG_ENABLED | - A6XX_SP_CS_CONFIG_NIBO(kernel->num_bufs) | - A6XX_SP_CS_CONFIG_NTEX(v->num_samp) | - A6XX_SP_CS_CONFIG_NSAMP(v->num_samp)); /* SP_VS_CONFIG */ - OUT_RING(ring, v->instrlen); /* SP_VS_INSTRLEN */ - - OUT_PKT4(ring, REG_A6XX_SP_CS_CTRL_REG0, 1); - OUT_RING(ring, - A6XX_SP_CS_CTRL_REG0_THREADSIZE(thrsz) | - A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(i->max_reg + 1) | - A6XX_SP_CS_CTRL_REG0_HALFREGFOOTPRINT(i->max_half_reg + 1) | - COND(v->mergedregs, A6XX_SP_CS_CTRL_REG0_MERGEDREGS) | - COND(ir3_kernel->info.early_preamble, A6XX_SP_CS_CTRL_REG0_EARLYPREAMBLE) | - A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(ir3_shader_branchstack_hw(v))); - - OUT_PKT4(ring, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1); - OUT_RING(ring, 0x41); - - if (a6xx_backend->info->a6xx.has_lpac) { - OUT_PKT4(ring, REG_A6XX_HLSQ_CS_UNKNOWN_B9D0, 1); - OUT_RING(ring, A6XX_HLSQ_CS_UNKNOWN_B9D0_SHARED_SIZE(1) | - A6XX_HLSQ_CS_UNKNOWN_B9D0_UNK6); - } - - uint32_t local_invocation_id, work_group_id; - local_invocation_id = - ir3_find_sysval_regid(v, SYSTEM_VALUE_LOCAL_INVOCATION_ID); - work_group_id = ir3_find_sysval_regid(v, SYSTEM_VALUE_WORKGROUP_ID); - - OUT_PKT4(ring, REG_A6XX_HLSQ_CS_CNTL_0, 2); - OUT_RING(ring, A6XX_HLSQ_CS_CNTL_0_WGIDCONSTID(work_group_id) | - A6XX_HLSQ_CS_CNTL_0_WGSIZECONSTID(regid(63, 0)) | - A6XX_HLSQ_CS_CNTL_0_WGOFFSETCONSTID(regid(63, 0)) | - A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID(local_invocation_id)); - OUT_RING(ring, A6XX_HLSQ_CS_CNTL_1_LINEARLOCALIDREGID(regid(63, 0)) | - A6XX_HLSQ_CS_CNTL_1_THREADSIZE(thrsz)); - - if (a6xx_backend->info->a6xx.has_lpac) { - OUT_PKT4(ring, REG_A6XX_SP_CS_CNTL_0, 2); - OUT_RING(ring, A6XX_SP_CS_CNTL_0_WGIDCONSTID(work_group_id) | - A6XX_SP_CS_CNTL_0_WGSIZECONSTID(regid(63, 0)) | - A6XX_SP_CS_CNTL_0_WGOFFSETCONSTID(regid(63, 0)) | - A6XX_SP_CS_CNTL_0_LOCALIDREGID(local_invocation_id)); - OUT_RING(ring, A6XX_SP_CS_CNTL_1_LINEARLOCALIDREGID(regid(63, 0)) | - A6XX_SP_CS_CNTL_1_THREADSIZE(thrsz)); - } - - OUT_PKT4(ring, REG_A6XX_SP_CS_OBJ_START, 2); - OUT_RELOC(ring, v->bo, 0, 0, 0); /* SP_CS_OBJ_START_LO/HI */ - - OUT_PKT4(ring, REG_A6XX_SP_CS_INSTRLEN, 1); - OUT_RING(ring, v->instrlen); - - OUT_PKT4(ring, REG_A6XX_SP_CS_OBJ_START, 2); - OUT_RELOC(ring, v->bo, 0, 0, 0); - - uint32_t shader_preload_size = - MIN2(v->instrlen, a6xx_backend->info->a6xx.instr_cache_size); - OUT_PKT7(ring, CP_LOAD_STATE6_FRAG, 3); - OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) | - CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) | - CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | - CP_LOAD_STATE6_0_STATE_BLOCK(SB6_CS_SHADER) | - CP_LOAD_STATE6_0_NUM_UNIT(shader_preload_size)); - OUT_RELOC(ring, v->bo, 0, 0, 0); - - if (v->pvtmem_size > 0) { - uint32_t per_fiber_size = ALIGN(v->pvtmem_size, 512); - uint32_t per_sp_size = - ALIGN(per_fiber_size * a6xx_backend->info->a6xx.fibers_per_sp, 1 << 12); - uint32_t total_size = per_sp_size * a6xx_backend->info->num_sp_cores; - - struct fd_bo *pvtmem = fd_bo_new(a6xx_backend->dev, total_size, 0, "pvtmem"); - OUT_PKT4(ring, REG_A6XX_SP_CS_PVT_MEM_PARAM, 4); - OUT_RING(ring, A6XX_SP_CS_PVT_MEM_PARAM_MEMSIZEPERITEM(per_fiber_size)); - OUT_RELOC(ring, pvtmem, 0, 0, 0); - OUT_RING(ring, A6XX_SP_CS_PVT_MEM_SIZE_TOTALPVTMEMSIZE(per_sp_size) | - COND(v->pvtmem_per_wave, - A6XX_SP_CS_PVT_MEM_SIZE_PERWAVEMEMLAYOUT)); - - OUT_PKT4(ring, REG_A6XX_SP_CS_PVT_MEM_HW_STACK_OFFSET, 1); - OUT_RING(ring, A6XX_SP_CS_PVT_MEM_HW_STACK_OFFSET_OFFSET(per_sp_size)); - } -} - -static void -emit_const(struct fd_ringbuffer *ring, uint32_t regid, uint32_t sizedwords, - const uint32_t *dwords) -{ - uint32_t align_sz; - - assert((regid % 4) == 0); - - align_sz = align(sizedwords, 4); - - OUT_PKT7(ring, CP_LOAD_STATE6_FRAG, 3 + align_sz); - OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(regid / 4) | - CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | - CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | - CP_LOAD_STATE6_0_STATE_BLOCK(SB6_CS_SHADER) | - CP_LOAD_STATE6_0_NUM_UNIT(DIV_ROUND_UP(sizedwords, 4))); - OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); - OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); - - for (uint32_t i = 0; i < sizedwords; i++) { - OUT_RING(ring, dwords[i]); - } - - /* Zero-pad to multiple of 4 dwords */ - for (uint32_t i = sizedwords; i < align_sz; i++) { - OUT_RING(ring, 0); - } -} - -static void -cs_const_emit(struct fd_ringbuffer *ring, struct kernel *kernel, - uint32_t grid[3]) -{ - struct ir3_kernel *ir3_kernel = to_ir3_kernel(kernel); - struct ir3_shader_variant *v = ir3_kernel->v; - - const struct ir3_const_state *const_state = ir3_const_state(v); - uint32_t base = const_state->offsets.immediate; - int size = DIV_ROUND_UP(const_state->immediates_count, 4); - - if (ir3_kernel->info.numwg != INVALID_REG) { - assert((ir3_kernel->info.numwg & 0x3) == 0); - int idx = ir3_kernel->info.numwg >> 2; - const_state->immediates[idx * 4 + 0] = grid[0]; - const_state->immediates[idx * 4 + 1] = grid[1]; - const_state->immediates[idx * 4 + 2] = grid[2]; - } - - for (int i = 0; i < MAX_BUFS; i++) { - if (kernel->buf_addr_regs[i] != INVALID_REG) { - assert((kernel->buf_addr_regs[i] & 0x3) == 0); - int idx = kernel->buf_addr_regs[i] >> 2; - - uint64_t iova = fd_bo_get_iova(kernel->bufs[i]); - - const_state->immediates[idx * 4 + 1] = iova >> 32; - const_state->immediates[idx * 4 + 0] = (iova << 32) >> 32; - } - } - - /* truncate size to avoid writing constants that shader - * does not use: - */ - size = MIN2(size + base, v->constlen) - base; - - /* convert out of vec4: */ - base *= 4; - size *= 4; - - if (size > 0) { - emit_const(ring, base, size, const_state->immediates); - } -} - -static void -cs_ibo_emit(struct fd_ringbuffer *ring, struct fd_submit *submit, - struct kernel *kernel) -{ - struct fd_ringbuffer *state = fd_submit_new_ringbuffer( - submit, kernel->num_bufs * 16 * 4, FD_RINGBUFFER_STREAMING); - - for (unsigned i = 0; i < kernel->num_bufs; i++) { - /* size is encoded with low 15b in WIDTH and high bits in HEIGHT, - * in units of elements: - */ - unsigned sz = kernel->buf_sizes[i]; - unsigned width = sz & MASK(15); - unsigned height = sz >> 15; - - OUT_RING(state, A6XX_TEX_CONST_0_FMT(FMT6_32_UINT) | A6XX_TEX_CONST_0_TILE_MODE(0)); - OUT_RING(state, A6XX_TEX_CONST_1_WIDTH(width) | A6XX_TEX_CONST_1_HEIGHT(height)); - OUT_RING(state, A6XX_TEX_CONST_2_PITCH(0) | A6XX_TEX_CONST_2_BUFFER | - A6XX_TEX_CONST_2_TYPE(A6XX_TEX_BUFFER)); - OUT_RING(state, A6XX_TEX_CONST_3_ARRAY_PITCH(0)); - OUT_RELOC(state, kernel->bufs[i], 0, 0, 0); - OUT_RING(state, 0x00000000); - OUT_RING(state, 0x00000000); - OUT_RING(state, 0x00000000); - OUT_RING(state, 0x00000000); - OUT_RING(state, 0x00000000); - OUT_RING(state, 0x00000000); - OUT_RING(state, 0x00000000); - OUT_RING(state, 0x00000000); - OUT_RING(state, 0x00000000); - OUT_RING(state, 0x00000000); - } - - OUT_PKT7(ring, CP_LOAD_STATE6_FRAG, 3); - OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) | - CP_LOAD_STATE6_0_STATE_TYPE(ST6_IBO) | - CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | - CP_LOAD_STATE6_0_STATE_BLOCK(SB6_CS_SHADER) | - CP_LOAD_STATE6_0_NUM_UNIT(kernel->num_bufs)); - OUT_RB(ring, state); - - OUT_PKT4(ring, REG_A6XX_SP_CS_IBO, 2); - OUT_RB(ring, state); - - OUT_PKT4(ring, REG_A6XX_SP_CS_IBO_COUNT, 1); - OUT_RING(ring, kernel->num_bufs); - - fd_ringbuffer_del(state); -} - -static inline unsigned -event_write(struct fd_ringbuffer *ring, struct kernel *kernel, - enum vgt_event_type evt, bool timestamp) -{ - unsigned seqno = 0; - - OUT_PKT7(ring, CP_EVENT_WRITE, timestamp ? 4 : 1); - OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(evt)); - if (timestamp) { - struct ir3_kernel *ir3_kernel = to_ir3_kernel(kernel); - struct a6xx_backend *a6xx_backend = to_a6xx_backend(ir3_kernel->backend); - seqno = ++a6xx_backend->seqno; - OUT_RELOC(ring, control_ptr(a6xx_backend, seqno)); /* ADDR_LO/HI */ - OUT_RING(ring, seqno); - } - - return seqno; -} - -static inline void -cache_flush(struct fd_ringbuffer *ring, struct kernel *kernel) -{ - struct ir3_kernel *ir3_kernel = to_ir3_kernel(kernel); - struct a6xx_backend *a6xx_backend = to_a6xx_backend(ir3_kernel->backend); - unsigned seqno; - - seqno = event_write(ring, kernel, RB_DONE_TS, true); - - OUT_PKT7(ring, CP_WAIT_REG_MEM, 6); - OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ) | - CP_WAIT_REG_MEM_0_POLL_MEMORY); - OUT_RELOC(ring, control_ptr(a6xx_backend, seqno)); - OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(seqno)); - OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(~0)); - OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(16)); - - seqno = event_write(ring, kernel, CACHE_FLUSH_TS, true); - - OUT_PKT7(ring, CP_WAIT_MEM_GTE, 4); - OUT_RING(ring, CP_WAIT_MEM_GTE_0_RESERVED(0)); - OUT_RELOC(ring, control_ptr(a6xx_backend, seqno)); - OUT_RING(ring, CP_WAIT_MEM_GTE_3_REF(seqno)); -} - -static void -a6xx_emit_grid(struct kernel *kernel, uint32_t grid[3], - struct fd_submit *submit) -{ - struct ir3_kernel *ir3_kernel = to_ir3_kernel(kernel); - struct a6xx_backend *a6xx_backend = to_a6xx_backend(ir3_kernel->backend); - struct fd_ringbuffer *ring = fd_submit_new_ringbuffer( - submit, 0, FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE); - - cs_program_emit(ring, kernel); - cs_const_emit(ring, kernel, grid); - cs_ibo_emit(ring, submit, kernel); - - OUT_PKT7(ring, CP_SET_MARKER, 1); - OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_COMPUTE)); - - const unsigned *local_size = kernel->local_size; - const unsigned *num_groups = grid; - - unsigned work_dim = 0; - for (int i = 0; i < 3; i++) { - if (!grid[i]) - break; - work_dim++; - } - - OUT_PKT4(ring, REG_A6XX_HLSQ_CS_NDRANGE_0, 7); - OUT_RING(ring, A6XX_HLSQ_CS_NDRANGE_0_KERNELDIM(work_dim) | - A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX(local_size[0] - 1) | - A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY(local_size[1] - 1) | - A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ(local_size[2] - 1)); - OUT_RING(ring, - A6XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X(local_size[0] * num_groups[0])); - OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_2_GLOBALOFF_X */ - OUT_RING(ring, - A6XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y(local_size[1] * num_groups[1])); - OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_4_GLOBALOFF_Y */ - OUT_RING(ring, - A6XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z(local_size[2] * num_groups[2])); - OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_6_GLOBALOFF_Z */ - - OUT_PKT4(ring, REG_A6XX_HLSQ_CS_KERNEL_GROUP_X, 3); - OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_X */ - OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_Y */ - OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_Z */ - - if (a6xx_backend->num_perfcntrs > 0) { - a6xx_backend->query_mem = fd_bo_new( - a6xx_backend->dev, - a6xx_backend->num_perfcntrs * sizeof(struct fd6_query_sample), 0, "query"); - - /* configure the performance counters to count the requested - * countables: - */ - for (unsigned i = 0; i < a6xx_backend->num_perfcntrs; i++) { - const struct perfcntr *counter = &a6xx_backend->perfcntrs[i]; - - OUT_PKT4(ring, counter->select_reg, 1); - OUT_RING(ring, counter->selector); - } - - OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0); - - /* and snapshot the start values: */ - for (unsigned i = 0; i < a6xx_backend->num_perfcntrs; i++) { - const struct perfcntr *counter = &a6xx_backend->perfcntrs[i]; - - OUT_PKT7(ring, CP_REG_TO_MEM, 3); - OUT_RING(ring, CP_REG_TO_MEM_0_64B | - CP_REG_TO_MEM_0_REG(counter->counter_reg_lo)); - OUT_RELOC(ring, query_sample_idx(a6xx_backend, i, start)); - } - } - - OUT_PKT7(ring, CP_EXEC_CS, 4); - OUT_RING(ring, 0x00000000); - OUT_RING(ring, CP_EXEC_CS_1_NGROUPS_X(grid[0])); - OUT_RING(ring, CP_EXEC_CS_2_NGROUPS_Y(grid[1])); - OUT_RING(ring, CP_EXEC_CS_3_NGROUPS_Z(grid[2])); - - OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0); - - if (a6xx_backend->num_perfcntrs > 0) { - /* snapshot the end values: */ - for (unsigned i = 0; i < a6xx_backend->num_perfcntrs; i++) { - const struct perfcntr *counter = &a6xx_backend->perfcntrs[i]; - - OUT_PKT7(ring, CP_REG_TO_MEM, 3); - OUT_RING(ring, CP_REG_TO_MEM_0_64B | - CP_REG_TO_MEM_0_REG(counter->counter_reg_lo)); - OUT_RELOC(ring, query_sample_idx(a6xx_backend, i, stop)); - } - - /* and compute the result: */ - for (unsigned i = 0; i < a6xx_backend->num_perfcntrs; i++) { - /* result += stop - start: */ - OUT_PKT7(ring, CP_MEM_TO_MEM, 9); - OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C); - OUT_RELOC(ring, query_sample_idx(a6xx_backend, i, result)); /* dst */ - OUT_RELOC(ring, query_sample_idx(a6xx_backend, i, result)); /* srcA */ - OUT_RELOC(ring, query_sample_idx(a6xx_backend, i, stop)); /* srcB */ - OUT_RELOC(ring, query_sample_idx(a6xx_backend, i, start)); /* srcC */ - } - } - - cache_flush(ring, kernel); -} - -static void -a6xx_set_perfcntrs(struct backend *b, const struct perfcntr *perfcntrs, - unsigned num_perfcntrs) -{ - struct a6xx_backend *a6xx_backend = to_a6xx_backend(b); - - a6xx_backend->perfcntrs = perfcntrs; - a6xx_backend->num_perfcntrs = num_perfcntrs; -} - -static void -a6xx_read_perfcntrs(struct backend *b, uint64_t *results) -{ - struct a6xx_backend *a6xx_backend = to_a6xx_backend(b); - - fd_bo_cpu_prep(a6xx_backend->query_mem, NULL, FD_BO_PREP_READ); - struct fd6_query_sample *samples = fd_bo_map(a6xx_backend->query_mem); - - for (unsigned i = 0; i < a6xx_backend->num_perfcntrs; i++) { - results[i] = samples[i].result; - } -} - -struct backend * -a6xx_init(struct fd_device *dev, const struct fd_dev_id *dev_id) -{ - struct a6xx_backend *a6xx_backend = calloc(1, sizeof(*a6xx_backend)); - - a6xx_backend->base = (struct backend){ - .assemble = a6xx_assemble, - .disassemble = a6xx_disassemble, - .emit_grid = a6xx_emit_grid, - .set_perfcntrs = a6xx_set_perfcntrs, - .read_perfcntrs = a6xx_read_perfcntrs, - }; - - a6xx_backend->compiler = ir3_compiler_create(dev, dev_id, - &(struct ir3_compiler_options){}); - a6xx_backend->dev = dev; - - a6xx_backend->info = fd_dev_info(dev_id); - - a6xx_backend->control_mem = - fd_bo_new(dev, 0x1000, 0, "control"); - - return &a6xx_backend->base; -} diff --git a/lib/mesa/src/freedreno/computerator/ir3_asm.c b/lib/mesa/src/freedreno/computerator/ir3_asm.c deleted file mode 100644 index e0f3c9bc5..000000000 --- a/lib/mesa/src/freedreno/computerator/ir3_asm.c +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright © 2020 Google, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ir3/ir3_assembler.h" -#include "ir3/ir3_compiler.h" - -#include "ir3_asm.h" - -struct ir3_kernel * -ir3_asm_assemble(struct ir3_compiler *c, FILE *in) -{ - struct ir3_kernel *kernel = calloc(1, sizeof(*kernel)); - struct ir3_shader *shader = ir3_parse_asm(c, &kernel->info, in); - if (!shader) - errx(-1, "assembler failed"); - struct ir3_shader_variant *v = shader->variants; - - kernel->v = v; - kernel->bin = v->bin; - - kernel->base.local_size[0] = v->local_size[0]; - kernel->base.local_size[1] = v->local_size[1]; - kernel->base.local_size[2] = v->local_size[2]; - kernel->base.num_bufs = kernel->info.num_bufs; - memcpy(kernel->base.buf_sizes, kernel->info.buf_sizes, - sizeof(kernel->base.buf_sizes)); - memcpy(kernel->base.buf_addr_regs, kernel->info.buf_addr_regs, - sizeof(kernel->base.buf_addr_regs)); - - unsigned sz = v->info.size; - - v->bo = fd_bo_new(c->dev, sz, 0, "%s", ir3_shader_stage(v)); - - memcpy(fd_bo_map(v->bo), kernel->bin, sz); - - /* Always include shaders in kernel crash dumps. */ - fd_bo_mark_for_dump(v->bo); - - return kernel; -} - -void -ir3_asm_disassemble(struct ir3_kernel *k, FILE *out) -{ - ir3_shader_disasm(k->v, k->bin, out); -} diff --git a/lib/mesa/src/freedreno/computerator/main.c b/lib/mesa/src/freedreno/computerator/main.c deleted file mode 100644 index 6c4f14534..000000000 --- a/lib/mesa/src/freedreno/computerator/main.c +++ /dev/null @@ -1,314 +0,0 @@ -/* - * Copyright © 2020 Google, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <getopt.h> -#include <inttypes.h> -#include <locale.h> -#include <stdlib.h> -#include <xf86drm.h> - -#include "util/u_math.h" - -#include "perfcntrs/freedreno_perfcntr.h" - -#include "main.h" - -static void -dump_float(void *buf, int sz) -{ - uint8_t *ptr = (uint8_t *)buf; - uint8_t *end = ptr + sz - 3; - int i = 0; - - while (ptr < end) { - uint32_t d = 0; - - printf((i % 8) ? " " : "\t"); - - d |= *(ptr++) << 0; - d |= *(ptr++) << 8; - d |= *(ptr++) << 16; - d |= *(ptr++) << 24; - - printf("%8f", uif(d)); - - if ((i % 8) == 7) { - printf("\n"); - } - - i++; - } - - if (i % 8) { - printf("\n"); - } -} - -static void -dump_hex(void *buf, int sz) -{ - uint8_t *ptr = (uint8_t *)buf; - uint8_t *end = ptr + sz; - int i = 0; - - while (ptr < end) { - uint32_t d = 0; - - printf((i % 8) ? " " : "\t"); - - d |= *(ptr++) << 0; - d |= *(ptr++) << 8; - d |= *(ptr++) << 16; - d |= *(ptr++) << 24; - - printf("%08x", d); - - if ((i % 8) == 7) { - printf("\n"); - } - - i++; - } - - if (i % 8) { - printf("\n"); - } -} - -static const char *shortopts = "df:g:hp:"; - -static const struct option longopts[] = { - {"disasm", no_argument, 0, 'd'}, {"file", required_argument, 0, 'f'}, - {"groups", required_argument, 0, 'g'}, {"help", no_argument, 0, 'h'}, - {"perfcntr", required_argument, 0, 'p'}, {0, 0, 0, 0}}; - -static void -usage(const char *name) -{ - printf( - "Usage: %s [-dfgh]\n" - "\n" - "options:\n" - " -d, --disasm print disassembled shader\n" - " -f, --file=FILE read shader from file (instead of stdin)\n" - " -g, --groups=X,Y,Z use specified group size\n" - " -h, --help show this message\n" - " -p, --perfcntr=LIST sample specified performance counters " - "(comma\n" - " separated list)\n", - name); -} - -/* performance counter description: */ -static unsigned num_groups; -static const struct fd_perfcntr_group *groups; - -/* Track enabled counters per group: */ -static unsigned *enabled_counters; - -static void -setup_counter(const char *name, struct perfcntr *c) -{ - for (int i = 0; i < num_groups; i++) { - const struct fd_perfcntr_group *group = &groups[i]; - - for (int j = 0; j < group->num_countables; j++) { - const struct fd_perfcntr_countable *countable = &group->countables[j]; - - if (strcmp(name, countable->name) != 0) - continue; - - /* - * Allocate a counter to use to monitor the requested countable: - */ - if (enabled_counters[i] >= group->num_counters) { - errx(-1, "Too many counters selected in group: %s", group->name); - } - - unsigned idx = enabled_counters[i]++; - const struct fd_perfcntr_counter *counter = &group->counters[idx]; - - /* - * And initialize the perfcntr struct, pulling together the info - * about selected counter and countable, to simplify life for the - * backend: - */ - c->name = name; - c->select_reg = counter->select_reg; - c->counter_reg_lo = counter->counter_reg_lo; - c->counter_reg_hi = counter->counter_reg_hi; - c->selector = countable->selector; - - return; - } - } - - errx(-1, "could not find countable: %s", name); -} - -static struct perfcntr * -parse_perfcntrs(const struct fd_dev_id *dev_id, const char *perfcntrstr, - unsigned *num_perfcntrs) -{ - struct perfcntr *counters = NULL; - char *cnames, *s; - unsigned cnt = 0; - - groups = fd_perfcntrs(dev_id, &num_groups); - enabled_counters = calloc(num_groups, sizeof(enabled_counters[0])); - - cnames = strdup(perfcntrstr); - while ((s = strstr(cnames, ","))) { - char *name = cnames; - s[0] = '\0'; - cnames = &s[1]; - - counters = realloc(counters, ++cnt * sizeof(counters[0])); - setup_counter(name, &counters[cnt - 1]); - } - - char *name = cnames; - counters = realloc(counters, ++cnt * sizeof(counters[0])); - setup_counter(name, &counters[cnt - 1]); - - *num_perfcntrs = cnt; - - return counters; -} - -int -main(int argc, char **argv) -{ - FILE *in = stdin; - const char *perfcntrstr = NULL; - struct perfcntr *perfcntrs = NULL; - unsigned num_perfcntrs = 0; - bool disasm = false; - uint32_t grid[3] = {0}; - int opt, ret; - - setlocale(LC_NUMERIC, "en_US.UTF-8"); - - while ((opt = getopt_long_only(argc, argv, shortopts, longopts, NULL)) != - -1) { - switch (opt) { - case 'd': - disasm = true; - break; - case 'f': - in = fopen(optarg, "r"); - if (!in) - err(1, "could not open '%s'", optarg); - break; - case 'g': - ret = sscanf(optarg, "%u,%u,%u", &grid[0], &grid[1], &grid[2]); - if (ret != 3) - goto usage; - break; - case 'h': - goto usage; - case 'p': - perfcntrstr = optarg; - break; - default: - printf("unrecognized arg: %c\n", opt); - goto usage; - } - } - - struct fd_device *dev = fd_device_open(); - struct fd_pipe *pipe = fd_pipe_new(dev, FD_PIPE_3D); - - const struct fd_dev_id *dev_id = fd_pipe_dev_id(pipe); - - printf("got gpu: %s\n", fd_dev_name(dev_id)); - - struct backend *backend; - switch (fd_dev_gen(dev_id)) { - case 4: - backend = a4xx_init(dev, dev_id); - break; - case 6: - backend = a6xx_init(dev, dev_id); - break; - default: - err(1, "unsupported gpu generation: a%uxx", fd_dev_gen(dev_id)); - } - - struct kernel *kernel = backend->assemble(backend, in); - printf("localsize: %dx%dx%d\n", kernel->local_size[0], kernel->local_size[1], - kernel->local_size[2]); - for (int i = 0; i < kernel->num_bufs; i++) { - printf("buf[%d]: size=%u\n", i, kernel->buf_sizes[i]); - kernel->bufs[i] = fd_bo_new(dev, kernel->buf_sizes[i] * 4, 0, "buf[%d]", i); - } - - if (disasm) - backend->disassemble(kernel, stdout); - - if (grid[0] == 0) - return 0; - - struct fd_submit *submit = fd_submit_new(pipe); - - if (perfcntrstr) { - if (!backend->set_perfcntrs) { - err(1, "performance counters not supported"); - } - perfcntrs = parse_perfcntrs(dev_id, perfcntrstr, &num_perfcntrs); - backend->set_perfcntrs(backend, perfcntrs, num_perfcntrs); - } - - backend->emit_grid(kernel, grid, submit); - - struct fd_submit_fence fence = {}; - util_queue_fence_init(&fence.ready); - - fd_submit_flush(submit, -1, &fence); - - util_queue_fence_wait(&fence.ready); - - for (int i = 0; i < kernel->num_bufs; i++) { - fd_bo_cpu_prep(kernel->bufs[i], pipe, FD_BO_PREP_READ); - void *map = fd_bo_map(kernel->bufs[i]); - - printf("buf[%d]:\n", i); - dump_hex(map, kernel->buf_sizes[i] * 4); - dump_float(map, kernel->buf_sizes[i] * 4); - } - - if (perfcntrstr) { - uint64_t results[num_perfcntrs]; - backend->read_perfcntrs(backend, results); - - for (unsigned i = 0; i < num_perfcntrs; i++) { - printf("%s:\t%'" PRIu64 "\n", perfcntrs[i].name, results[i]); - } - } - - return 0; - -usage: - usage(argv[0]); - return -1; -} diff --git a/lib/mesa/src/freedreno/computerator/main.h b/lib/mesa/src/freedreno/computerator/main.h index 795aad842..d43ba7b63 100644 --- a/lib/mesa/src/freedreno/computerator/main.h +++ b/lib/mesa/src/freedreno/computerator/main.h @@ -81,6 +81,7 @@ struct backend { } struct backend *a4xx_init(struct fd_device *dev, const struct fd_dev_id *dev_id); +template<chip CHIP> struct backend *a6xx_init(struct fd_device *dev, const struct fd_dev_id *dev_id); /* for conditionally setting boolean flag(s): */ diff --git a/lib/mesa/src/freedreno/computerator/meson.build b/lib/mesa/src/freedreno/computerator/meson.build index 930220144..dba703cf1 100644 --- a/lib/mesa/src/freedreno/computerator/meson.build +++ b/lib/mesa/src/freedreno/computerator/meson.build @@ -19,14 +19,18 @@ # SOFTWARE. computerator_files = [ - 'a4xx.c', - 'a6xx.c', - 'ir3_asm.c', - 'main.c', + 'a4xx.cc', + 'a6xx.cc', + 'ir3_asm.cc', + 'main.cc', freedreno_xml_header_files, ir3_parser[1], ] +computerator_cpp_args = cpp.get_supported_arguments([ + '-Wno-sign-compare', +]) + computerator = executable( 'computerator', computerator_files, @@ -51,6 +55,7 @@ computerator = executable( # We don't actually use nir, but ir3 wants some nir headers: idep_nir, ], + cpp_args : [computerator_cpp_args], build_by_default : with_tools.contains('freedreno'), install : with_tools.contains('freedreno'), ) diff --git a/lib/mesa/src/freedreno/ir3/ir3_dce.c b/lib/mesa/src/freedreno/ir3/ir3_dce.c index 02cd29b68..6e0654ce9 100644 --- a/lib/mesa/src/freedreno/ir3/ir3_dce.c +++ b/lib/mesa/src/freedreno/ir3/ir3_dce.c @@ -112,13 +112,6 @@ find_and_remove_unused(struct ir3 *ir, struct ir3_shader_variant *so) foreach_block (block, &ir->block_list) { foreach_instr (instr, &block->instr_list) { if (instr->opc == OPC_META_INPUT) { - /* special case, if pre-fs texture fetch used, we cannot - * eliminate the barycentric i/j input - */ - if (so->num_sampler_prefetch && - instr->input.sysval == SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL) - continue; - /* Without GS header geometry shader is never invoked. */ if (instr->input.sysval == SYSTEM_VALUE_GS_HEADER_IR3) continue; diff --git a/lib/mesa/src/freedreno/ir3/ir3_lexer.l b/lib/mesa/src/freedreno/ir3/ir3_lexer.l index 07843a848..7bd6cdfe1 100644 --- a/lib/mesa/src/freedreno/ir3/ir3_lexer.l +++ b/lib/mesa/src/freedreno/ir3/ir3_lexer.l @@ -96,6 +96,7 @@ static int parse_reg(const char *str) [0-9]+"."[0-9]+ ir3_yylval.flt = strtod(yytext, NULL); return T_FLOAT; [0-9]* ir3_yylval.num = strtoul(yytext, NULL, 0); return T_INT; "0x"[0-9a-fA-F]* ir3_yylval.num = strtoul(yytext, NULL, 0); return T_HEX; +"raw 0x"[0-9a-fA-F]* ir3_yylval.u64 = strtoull(yytext + 4, NULL, 0); return T_RAW; "@localsize" return TOKEN(T_A_LOCALSIZE); "@const" return TOKEN(T_A_CONST); "@buf" return TOKEN(T_A_BUF); @@ -301,6 +302,7 @@ static int parse_reg(const char *str) "quad_shuffle.horiz" return TOKEN(T_OP_QSHUFFLE_H); "quad_shuffle.vert" return TOKEN(T_OP_QSHUFFLE_V); "quad_shuffle.diag" return TOKEN(T_OP_QSHUFFLE_DIAG); +"tcinv" return TOKEN(T_OP_TCINV); /* category 6: */ "ldg" return TOKEN(T_OP_LDG); @@ -380,6 +382,11 @@ static int parse_reg(const char *str) /* category 7: */ "bar" return TOKEN(T_OP_BAR); "fence" return TOKEN(T_OP_FENCE); +"sleep.l" return TOKEN(T_OP_SLEEP); +"icinv" return TOKEN(T_OP_ICINV); +"dccln.all" return TOKEN(T_OP_DCCLN); +"dcinv.all" return TOKEN(T_OP_DCINV); +"dcflu.all" return TOKEN(T_OP_DCFLU); "f16" return TOKEN(T_TYPE_F16); "f32" return TOKEN(T_TYPE_F32); diff --git a/lib/mesa/src/freedreno/ir3/ir3_parser.y b/lib/mesa/src/freedreno/ir3/ir3_parser.y index 36256d870..929989115 100644 --- a/lib/mesa/src/freedreno/ir3/ir3_parser.y +++ b/lib/mesa/src/freedreno/ir3/ir3_parser.y @@ -294,6 +294,7 @@ struct ir3 * ir3_parse(struct ir3_shader_variant *v, int tok; int num; uint32_t unum; + uint64_t u64; double flt; const char *str; struct ir3_register *reg; @@ -541,6 +542,7 @@ static void print_token(FILE *file, int type, YYSTYPE value) %token <tok> T_OP_QSHUFFLE_H %token <tok> T_OP_QSHUFFLE_V %token <tok> T_OP_QSHUFFLE_DIAG +%token <tok> T_OP_TCINV /* category 6: */ %token <tok> T_OP_LDG @@ -619,6 +621,13 @@ static void print_token(FILE *file, int type, YYSTYPE value) /* category 7: */ %token <tok> T_OP_BAR %token <tok> T_OP_FENCE +%token <tok> T_OP_SLEEP +%token <tok> T_OP_ICINV +%token <tok> T_OP_DCCLN +%token <tok> T_OP_DCINV +%token <tok> T_OP_DCFLU + +%token <u64> T_RAW /* type qualifiers: */ %token <tok> T_TYPE_F16 @@ -805,6 +814,7 @@ instr: iflags cat0_instr | iflags cat5_instr { fixup_cat5_s2en(); } | iflags cat6_instr | iflags cat7_instr +| raw_instr | label label: T_IDENTIFIER ':' { new_label($1); } @@ -1089,6 +1099,7 @@ cat5_instr: cat5_opc_dsxypp cat5_flags dst_reg ',' src_reg | cat5_opc cat5_flags cat5_type dst_reg ',' cat5_samp | cat5_opc cat5_flags cat5_type dst_reg ',' cat5_tex | cat5_opc cat5_flags cat5_type dst_reg +| T_OP_TCINV { new_instr(OPC_TCINV); } cat6_typed: '.' T_UNTYPED { instr->cat6.typed = 0; } | '.' T_TYPED { instr->cat6.typed = 1; } @@ -1279,7 +1290,16 @@ cat7_scopes: cat7_barrier: T_OP_BAR { new_instr(OPC_BAR); } cat7_scopes | T_OP_FENCE { new_instr(OPC_FENCE); } cat7_scopes +cat7_data_cache: T_OP_DCCLN { new_instr(OPC_DCCLN); } +| T_OP_DCINV { new_instr(OPC_DCINV); } +| T_OP_DCFLU { new_instr(OPC_DCFLU); } + cat7_instr: cat7_barrier +| cat7_data_cache +| T_OP_SLEEP { new_instr(OPC_SLEEP); } +| T_OP_ICINV { new_instr(OPC_ICINV); } + +raw_instr: T_RAW {new_instr(OPC_META_RAW)->raw.value = $1;} src: T_REGISTER { $$ = new_src($1, 0); } | T_A0 { $$ = new_src((61 << 3), IR3_REG_HALF); } diff --git a/lib/mesa/src/freedreno/ir3/tests/disasm.c b/lib/mesa/src/freedreno/ir3/tests/disasm.c index 2f49dbde2..c40302c57 100644 --- a/lib/mesa/src/freedreno/ir3/tests/disasm.c +++ b/lib/mesa/src/freedreno/ir3/tests/disasm.c @@ -177,6 +177,8 @@ static const struct test { /* dEQP-VK.subgroups.quad.graphics.subgroupquadswapdiagonal_int */ INSTR_6XX(b7e03104_00180001, "(sy)quad_shuffle.diag (u32)(x)r1.x, r0.x"), /* (sy)quad_shuffle.diag (u32)(xOOO)r1.x, r0.x */ + INSTR_6XX(a7000000_00000000, "tcinv"), + /* cat6 */ INSTR_5XX(c6e60000_00010600, "ldgb.untyped.4d.u32.1 r0.x, g[0], r1.x, r0.x"), /* ldgb.a.untyped.1dtype.u32.1 r0.x, g[r1.x], r0.x, 0 */ @@ -416,6 +418,10 @@ static const struct test { INSTR_6XX(e0fa0000_00000000, "fence.g.l.r.w"), INSTR_6XX(e09a0000_00000000, "fence.r.w"), INSTR_6XX(f0420000_00000000, "(sy)bar.g"), + INSTR_6XX(e1080000_00000000, "sleep.l"), + INSTR_6XX(e2080000_00000000, "dccln.all"), + + INSTR_6XX(ffffffff_ffffffff, "raw 0xFFFFFFFFFFFFFFFF"), /* clang-format on */ }; @@ -462,6 +468,7 @@ main(int argc, char **argv) &(struct isa_decode_options){ .gpu_id = test->gpu_id, .show_errors = true, + .no_match_cb = print_raw, }); fflush(fdisasm); diff --git a/lib/mesa/src/freedreno/registers/adreno/a7xx.xml b/lib/mesa/src/freedreno/registers/adreno/a7xx.xml deleted file mode 100644 index f258f6467..000000000 --- a/lib/mesa/src/freedreno/registers/adreno/a7xx.xml +++ /dev/null @@ -1,346 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<database xmlns="http://nouveau.freedesktop.org/" -xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" -xsi:schemaLocation="http://nouveau.freedesktop.org/ rules-ng.xsd"> -<import file="freedreno_copyright.xml"/> -<import file="adreno/adreno_common.xml"/> -<import file="adreno/adreno_pm4.xml"/> - -<domain name="A7XX" width="32"> - <reg32 offset="0x0011" name="RBBM_GBIF_CLIENT_QOS_CNTL"/> - <reg32 offset="0x0016" name="RBBM_GBIF_HALT"/> - <reg32 offset="0x0017" name="RBBM_GBIF_HALT_ACK"/> - <reg32 offset="0x001f" name="RBBM_INTERFACE_HANG_INT_CNTL"/> - <reg32 offset="0x0037" name="RBBM_INT_CLEAR_CMD"/> - <reg32 offset="0x0038" name="RBBM_INT_0_MASK"> - <bitfield name="GPUIDLE" pos="0" type="boolean"/> - <bitfield name="AHBERROR" pos="1" type="boolean"/> - <bitfield name="CPIPCINT0" pos="4" type="boolean"/> - <bitfield name="CPIPCINT1" pos="5" type="boolean"/> - <bitfield name="ATBASYNCFIFOOVERFLOW" pos="6" type="boolean"/> - <bitfield name="GPCERROR" pos="7" type="boolean"/> - <bitfield name="SWINTERRUPT" pos="8" type="boolean"/> - <bitfield name="HWERROR" pos="9" type="boolean"/> - <bitfield name="CCU_CLEAN_DEPTH_TS" pos="10" type="boolean"/> - <bitfield name="CCU_CLEAN_COLOR_TS" pos="11" type="boolean"/> - <bitfield name="CCU_RESOLVE_CLEAN_TS" pos="12" type="boolean"/> - <bitfield name="PM4CPINTERRUPT" pos="15" type="boolean"/> - <bitfield name="PM4CPINTERRUPTLPAC" pos="16" type="boolean"/> - <bitfield name="RB_DONE_TS" pos="17" type="boolean"/> - <bitfield name="CACHE_CLEAN_TS" pos="20" type="boolean"/> - <bitfield name="CACHE_CLEAN_TS_LPAC" pos="21" type="boolean"/> - <bitfield name="ATBBUSOVERFLOW" pos="22" type="boolean"/> - <bitfield name="HANGDETECTINTERRUPT" pos="23" type="boolean"/> - <bitfield name="OUTOFBOUNDACCESS" pos="24" type="boolean"/> - <bitfield name="UCHETRAPINTERRUPT" pos="25" type="boolean"/> - <bitfield name="DEBUGBUSINTERRUPT0" pos="26" type="boolean"/> - <bitfield name="DEBUGBUSINTERRUPT1" pos="27" type="boolean"/> - <bitfield name="TSBWRITEERROR" pos="28" type="boolean"/> - <bitfield name="ISDBCPUIRQ" pos="30" type="boolean"/> - <bitfield name="ISDBUNDERDEBUG" pos="31" type="boolean"/> - </reg32> - <reg32 offset="0x003a" name="RBBM_INT_2_MASK"/> - <reg32 offset="0x0042" name="RBBM_SP_HYST_CNT"/> - <reg32 offset="0x0043" name="RBBM_SW_RESET_CMD"/> - <reg32 offset="0x0044" name="RBBM_RAC_THRESHOLD_CNT"/> - <reg32 offset="0x00ae" name="RBBM_CLOCK_CNTL"/> - <reg32 offset="0x00b0" name="RBBM_CLOCK_CNTL_SP0"/> - <reg32 offset="0x00b4" name="RBBM_CLOCK_CNTL2_SP0"/> - <reg32 offset="0x00b8" name="RBBM_CLOCK_DELAY_SP0"/> - <reg32 offset="0x00bc" name="RBBM_CLOCK_HYST_SP0"/> - <reg32 offset="0x00c0" name="RBBM_CLOCK_CNTL_TP0"/> - <reg32 offset="0x00c4" name="RBBM_CLOCK_CNTL2_TP0"/> - <reg32 offset="0x00c8" name="RBBM_CLOCK_CNTL3_TP0"/> - <reg32 offset="0x00cc" name="RBBM_CLOCK_CNTL4_TP0"/> - <reg32 offset="0x00d0" name="RBBM_CLOCK_DELAY_TP0"/> - <reg32 offset="0x00d4" name="RBBM_CLOCK_DELAY2_TP0"/> - <reg32 offset="0x00d8" name="RBBM_CLOCK_DELAY3_TP0"/> - <reg32 offset="0x00dc" name="RBBM_CLOCK_DELAY4_TP0"/> - <reg32 offset="0x00e0" name="RBBM_CLOCK_HYST_TP0"/> - <reg32 offset="0x00e4" name="RBBM_CLOCK_HYST2_TP0"/> - <reg32 offset="0x00e8" name="RBBM_CLOCK_HYST3_TP0"/> - <reg32 offset="0x00ec" name="RBBM_CLOCK_HYST4_TP0"/> - <reg32 offset="0x00f0" name="RBBM_CLOCK_CNTL_RB0"/> - <reg32 offset="0x00f4" name="RBBM_CLOCK_CNTL2_RB0"/> - <reg32 offset="0x00f8" name="RBBM_CLOCK_CNTL_CCU0"/> - <reg32 offset="0x0100" name="RBBM_CLOCK_HYST_RB_CCU0"/> - <reg32 offset="0x0104" name="RBBM_CLOCK_CNTL_RAC"/> - <reg32 offset="0x0105" name="RBBM_CLOCK_CNTL2_RAC"/> - <reg32 offset="0x0106" name="RBBM_CLOCK_DELAY_RAC"/> - <reg32 offset="0x0107" name="RBBM_CLOCK_HYST_RAC"/> - <reg32 offset="0x0108" name="RBBM_CLOCK_CNTL_TSE_RAS_RBBM"/> - <reg32 offset="0x0109" name="RBBM_CLOCK_DELAY_TSE_RAS_RBBM"/> - <reg32 offset="0x010a" name="RBBM_CLOCK_HYST_TSE_RAS_RBBM"/> - <reg32 offset="0x010b" name="RBBM_CLOCK_CNTL_UCHE"/> - <reg32 offset="0x010f" name="RBBM_CLOCK_DELAY_UCHE"/> - <reg32 offset="0x0110" name="RBBM_CLOCK_HYST_UCHE"/> - <reg32 offset="0x0111" name="RBBM_CLOCK_MODE_VFD"/> - <reg32 offset="0x0112" name="RBBM_CLOCK_DELAY_VFD"/> - <reg32 offset="0x0113" name="RBBM_CLOCK_HYST_VFD"/> - <reg32 offset="0x0114" name="RBBM_CLOCK_MODE_GPC"/> - <reg32 offset="0x0115" name="RBBM_CLOCK_DELAY_GPC"/> - <reg32 offset="0x0116" name="RBBM_CLOCK_HYST_GPC"/> - <reg32 offset="0x0117" name="RBBM_CLOCK_DELAY_HLSQ_2"/> - <reg32 offset="0x0118" name="RBBM_CLOCK_CNTL_GMU_GX"/> - <reg32 offset="0x0119" name="RBBM_CLOCK_DELAY_GMU_GX"/> - <reg32 offset="0x011a" name="RBBM_CLOCK_HYST_GMU_GX"/> - <reg32 offset="0x011b" name="RBBM_CLOCK_MODE_HLSQ"/> - <reg32 offset="0x011c" name="RBBM_CLOCK_DELAY_HLSQ"/> - <reg32 offset="0x011d" name="RBBM_CLOCK_HYST_HLSQ"/> - <reg32 offset="0x0201" name="RBBM_INT_0_STATUS"/> - <reg32 offset="0x0210" name="RBBM_STATUS"> - <bitfield name="CPAHBBUSYCXMASTER" pos="0" type="boolean"/> - <bitfield name="CPAHBBUSYCPMASTER" pos="1" type="boolean"/> - <bitfield name="CPBUSY" pos="2" type="boolean"/> - <bitfield name="GFXDBGCBUSY" pos="3" type="boolean"/> - <bitfield name="VBIFGXFPARTBUSY" pos="4" type="boolean"/> - <bitfield name="TSEBUSY" pos="5" type="boolean"/> - <bitfield name="RASBUSY" pos="6" type="boolean"/> - <bitfield name="RBBUSY" pos="7" type="boolean"/> - <bitfield name="CCUBUSY" pos="8" type="boolean"/> - <bitfield name="A2DBUSY" pos="9" type="boolean"/> - <bitfield name="LRZBUSY" pos="10" type="boolean"/> - <bitfield name="COMDCOMBUSY" pos="11" type="boolean"/> - <bitfield name="PCDCALLBUSY" pos="12" type="boolean"/> - <bitfield name="PCVSDBUSY" pos="13" type="boolean"/> - <bitfield name="TESSBUSY" pos="14" type="boolean"/> - <bitfield name="VFDBUSY" pos="15" type="boolean"/> - <bitfield name="VPCBUSY" pos="16" type="boolean"/> - <bitfield name="UCHEBUSY" pos="17" type="boolean"/> - <bitfield name="SPBUSY" pos="18" type="boolean"/> - <bitfield name="TPL1BUSY" pos="19" type="boolean"/> - <bitfield name="VSCBUSY" pos="20" type="boolean"/> - <bitfield name="HLSQBUSY" pos="21" type="boolean"/> - <bitfield name="GPUBUSYIGNAHBCP" pos="22" type="boolean"/> - <bitfield name="GPUBUSYIGNAHB" pos="23" type="boolean"/> - </reg32> - <reg32 offset="0x0213" name="RBBM_STATUS3"/> - <reg32 offset="0x0260" name="RBBM_CLOCK_MODE_CP"/> - <reg32 offset="0x0284" name="RBBM_CLOCK_MODE_BV_LRZ"/> - <reg32 offset="0x0285" name="RBBM_CLOCK_MODE_BV_GRAS"/> - <reg32 offset="0x0286" name="RBBM_CLOCK_MODE2_GRAS"/> - <reg32 offset="0x0287" name="RBBM_CLOCK_MODE_BV_VFD"/> - <reg32 offset="0x0288" name="RBBM_CLOCK_MODE_BV_GPC"/> - <reg64 offset="0x0300" name="RBBM_PERFCTR_CP" stride="2" length="14"/> - <reg64 offset="0x031c" name="RBBM_PERFCTR_RBBM" stride="2" length="4"/> - <reg64 offset="0x0324" name="RBBM_PERFCTR_PC" stride="2" length="8"/> - <reg64 offset="0x0334" name="RBBM_PERFCTR_VFD" stride="2" length="8"/> - <reg64 offset="0x0344" name="RBBM_PERFCTR_HLSQ" stride="2" length="6"/> - <reg64 offset="0x0350" name="RBBM_PERFCTR_VPC" stride="2" length="6"/> - <reg64 offset="0x035c" name="RBBM_PERFCTR_CCU" stride="2" length="5"/> - <reg64 offset="0x0366" name="RBBM_PERFCTR_TSE" stride="2" length="4"/> - <reg64 offset="0x036e" name="RBBM_PERFCTR_RAS" stride="2" length="4"/> - <reg64 offset="0x0376" name="RBBM_PERFCTR_UCHE" stride="2" length="12"/> - <reg64 offset="0x038e" name="RBBM_PERFCTR_TP" stride="2" length="12"/> - <reg64 offset="0x03a6" name="RBBM_PERFCTR_SP" stride="2" length="24"/> - <reg64 offset="0x03d6" name="RBBM_PERFCTR_RB" stride="2" length="8"/> - <reg64 offset="0x03e6" name="RBBM_PERFCTR_VSC" stride="2" length="2"/> - <reg64 offset="0x03ea" name="RBBM_PERFCTR_LRZ" stride="2" length="4"/> - <reg64 offset="0x03f2" name="RBBM_PERFCTR_CMP" stride="2" length="4"/> - <reg64 offset="0x03fa" name="RBBM_PERFCTR_UFC" stride="2" length="4"/> - <reg64 offset="0x0410" name="RBBM_PERFCTR2_HLSQ" stride="2" length="6"/> - <reg64 offset="0x041c" name="RBBM_PERFCTR2_CP" stride="2" length="7"/> - <reg64 offset="0x042a" name="RBBM_PERFCTR2_SP" stride="2" length="12"/> - <reg64 offset="0x0442" name="RBBM_PERFCTR2_TP" stride="2" length="6"/> - <reg64 offset="0x044e" name="RBBM_PERFCTR2_UFC" stride="2" length="2"/> - <reg64 offset="0x0460" name="RBBM_PERFCTR_BV_PC" stride="2" length="8"/> - <reg64 offset="0x0470" name="RBBM_PERFCTR_BV_VFD" stride="2" length="8"/> - <reg64 offset="0x0480" name="RBBM_PERFCTR_BV_VPC" stride="2" length="6"/> - <reg64 offset="0x048c" name="RBBM_PERFCTR_BV_TSE" stride="2" length="4"/> - <reg64 offset="0x0494" name="RBBM_PERFCTR_BV_RAS" stride="2" length="4"/> - <reg64 offset="0x049c" name="RBBM_PERFCTR_BV_LRZ" stride="2" length="4"/> - <reg32 offset="0x0500" name="RBBM_PERFCTR_CNTL"/> - <reg32 offset="0x0507" name="RBBM_PERFCTR_RBBM_SEL" stride="1" length="4"/> - <reg32 offset="0x050b" name="RBBM_PERFCTR_GPU_BUSY_MASKED"/> - <reg32 offset="0x0533" name="RBBM_ISDB_CNT"/> - <reg32 offset="0x0534" name="RBBM_NC_MODE_CNTL"/> - <reg32 offset="0x0535" name="RBBM_SNAPSHOT_STATUS"/> - <reg64 offset="0x0800" name="CP_RB_BASE"/> - <reg32 offset="0x0802" name="CP_RB_CNTL"/> - <reg64 offset="0x0804" name="CP_RB_RPTR_ADDR"/> - <reg32 offset="0x0806" name="CP_RB_RPTR"/> - <reg32 offset="0x0807" name="CP_RB_WPTR"/> - <reg32 offset="0x0808" name="CP_SQE_CNTL"/> - <reg32 offset="0x0812" name="CP_CP2GMU_STATUS"/> - <reg32 offset="0x0821" name="CP_HW_FAULT"/> - <reg32 offset="0x0823" name="CP_INTERRUPT_STATUS"> - <bitfield name="OPCODEERROR" pos="0" type="boolean"/> - <bitfield name="UCODEERROR" pos="1" type="boolean"/> - <bitfield name="CPHWFAULT" pos="2" type="boolean"/> - <bitfield name="REGISTERPROTECTION" pos="4" type="boolean"/> - <bitfield name="VSDPARITYERROR" pos="6" type="boolean"/> - <bitfield name="ILLEGALINSTRUCTION" pos="7" type="boolean"/> - <bitfield name="OPCODEERRORLPAC" pos="8" type="boolean"/> - <bitfield name="UCODEERRORLPAC" pos="9" type="boolean"/> - <bitfield name="CPHWFAULTLPAC" pos="10" type="boolean"/> - <bitfield name="REGISTERPROTECTIONLPAC" pos="11" type="boolean"/> - <bitfield name="ILLEGALINSTRUCTIONLPAC" pos="12" type="boolean"/> - <bitfield name="OPCODEERRORBV" pos="13" type="boolean"/> - <bitfield name="UCODEERRORBV" pos="14" type="boolean"/> - <bitfield name="CPHWFAULTBV" pos="15" type="boolean"/> - <bitfield name="REGISTERPROTECTIONBV" pos="16" type="boolean"/> - <bitfield name="ILLEGALINSTRUCTIONBV" pos="17" type="boolean"/> - </reg32> - <reg32 offset="0x0824" name="CP_PROTECT_STATUS"/> - <reg32 offset="0x0825" name="CP_STATUS_1"/> - <reg64 offset="0x0830" name="CP_SQE_INSTR_BASE"/> - <reg32 offset="0x0840" name="CP_MISC_CNTL"/> - <reg32 offset="0x0841" name="CP_CHICKEN_DBG"/> - <reg32 offset="0x0843" name="CP_DBG_ECO_CNTL"/> - <reg32 offset="0x0844" name="CP_APRIV_CNTL"/> - <reg32 offset="0x084f" name="CP_PROTECT_CNTL"/> - <reg32 offset="0x0850" name="CP_PROTECT_REG" stride="1" length="48"/> - <reg32 offset="0x08a0" name="CP_CONTEXT_SWITCH_CNTL"/> - <reg64 offset="0x08a1" name="CP_CONTEXT_SWITCH_SMMU_INFO"/> - <reg64 offset="0x08a3" name="CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR"/> - <reg64 offset="0x08a5" name="CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR"/> - <reg64 offset="0x08a7" name="CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR"/> - <reg32 offset="0x08ab" name="CP_CONTEXT_SWITCH_LEVEL_STATUS"/> - <reg32 offset="0x08d0" name="CP_PERFCTR_CP_SEL" stride="1" length="14"/> - <reg32 offset="0x08e0" name="CP_BV_PERFCTR_CP_SEL" stride="1" length="7"/> - <reg64 offset="0x0900" name="CP_CRASH_SCRIPT_BASE"/> - <reg32 offset="0x0902" name="CP_CRASH_DUMP_CNTL"/> - <reg32 offset="0x0903" name="CP_CRASH_DUMP_STATUS"/> - <reg32 offset="0x0908" name="CP_SQE_STAT_ADDR"/> - <reg32 offset="0x0909" name="CP_SQE_STAT_DATA"/> - <reg32 offset="0x090a" name="CP_DRAW_STATE_ADDR"/> - <reg32 offset="0x090b" name="CP_DRAW_STATE_DATA"/> - <reg32 offset="0x090c" name="CP_ROQ_DBG_ADDR"/> - <reg32 offset="0x090d" name="CP_ROQ_DBG_DATA"/> - <reg32 offset="0x090e" name="CP_MEM_POOL_DBG_ADDR"/> - <reg32 offset="0x090f" name="CP_MEM_POOL_DBG_DATA"/> - <reg32 offset="0x0910" name="CP_SQE_UCODE_DBG_ADDR"/> - <reg32 offset="0x0911" name="CP_SQE_UCODE_DBG_DATA"/> - <reg64 offset="0x0928" name="CP_IB1_BASE"/> - <reg32 offset="0x092a" name="CP_IB1_REM_SIZE"/> - <reg64 offset="0x092b" name="CP_IB2_BASE"/> - <reg32 offset="0x092d" name="CP_IB2_REM_SIZE"/> - <reg64 offset="0x0980" name="CP_ALWAYS_ON_COUNTER"/> - <reg32 offset="0x098d" name="CP_AHB_CNTL"/> - <reg32 offset="0x0a00" name="CP_APERTURE_CNTL_HOST"/> - <reg32 offset="0x0a03" name="CP_APERTURE_CNTL_CD"/> - <reg32 offset="0x0a61" name="CP_BV_PROTECT_STATUS"/> - <reg32 offset="0x0a64" name="CP_BV_HW_FAULT"/> - <reg32 offset="0x0a81" name="CP_BV_DRAW_STATE_ADDR"/> - <reg32 offset="0x0a82" name="CP_BV_DRAW_STATE_DATA"/> - <reg32 offset="0x0a83" name="CP_BV_ROQ_DBG_ADDR"/> - <reg32 offset="0x0a84" name="CP_BV_ROQ_DBG_DATA"/> - <reg32 offset="0x0a85" name="CP_BV_SQE_UCODE_DBG_ADDR"/> - <reg32 offset="0x0a86" name="CP_BV_SQE_UCODE_DBG_DATA"/> - <reg32 offset="0x0a87" name="CP_BV_SQE_STAT_ADDR"/> - <reg32 offset="0x0a88" name="CP_BV_SQE_STAT_DATA"/> - <reg32 offset="0x0a96" name="CP_BV_MEM_POOL_DBG_ADDR"/> - <reg32 offset="0x0a97" name="CP_BV_MEM_POOL_DBG_DATA"/> - <reg64 offset="0x0a98" name="CP_BV_RB_RPTR_ADDR"/> - <reg32 offset="0x0a9a" name="CP_RESOURCE_TBL_DBG_ADDR"/> - <reg32 offset="0x0a9b" name="CP_RESOURCE_TBL_DBG_DATA"/> - <reg32 offset="0x0ad0" name="CP_BV_APRIV_CNTL"/> - <reg32 offset="0x0ada" name="CP_BV_CHICKEN_DBG"/> - <reg32 offset="0x0b0a" name="CP_LPAC_DRAW_STATE_ADDR"/> - <reg32 offset="0x0b0b" name="CP_LPAC_DRAW_STATE_DATA"/> - <reg32 offset="0x0b0c" name="CP_LPAC_ROQ_DBG_ADDR"/> - <reg32 offset="0x0b27" name="CP_SQE_AC_UCODE_DBG_ADDR"/> - <reg32 offset="0x0b28" name="CP_SQE_AC_UCODE_DBG_DATA"/> - <reg32 offset="0x0b29" name="CP_SQE_AC_STAT_ADDR"/> - <reg32 offset="0x0b2a" name="CP_SQE_AC_STAT_DATA"/> - <reg32 offset="0x0b31" name="CP_LPAC_APRIV_CNTL"/> - <reg32 offset="0x0b35" name="CP_LPAC_ROQ_DBG_DATA"/> - <reg32 offset="0x0b36" name="CP_LPAC_FIFO_DBG_DATA"/> - <reg32 offset="0x0b40" name="CP_LPAC_FIFO_DBG_ADDR"/> - <reg32 offset="0x0cd8" name="VSC_PERFCTR_VSC_SEL" stride="1" length="2"/> - <reg32 offset="0x0e01" name="UCHE_MODE_CNTL"/> - <reg64 offset="0x0e07" name="UCHE_WRITE_THRU_BASE"/> - <reg64 offset="0x0e09" name="UCHE_TRAP_BASE"/> - <reg64 offset="0x0e0b" name="UCHE_GMEM_RANGE_MIN"/> - <reg64 offset="0x0e0d" name="UCHE_GMEM_RANGE_MAX"/> - <reg32 offset="0x0e17" name="UCHE_CACHE_WAYS"/> - <reg32 offset="0x0e19" name="UCHE_CLIENT_PF"/> - <reg32 offset="0x0e1c" name="UCHE_PERFCTR_UCHE_SEL" stride="1" length="12"/> - <reg32 offset="0x0e3a" name="UCHE_GBIF_GX_CONFIG"/> - <reg32 offset="0x0e3c" name="UCHE_CMDQ_CONFIG"/> - <reg32 offset="0x1140" name="PDC_GPU_ENABLE_PDC"/> - <reg32 offset="0x1148" name="PDC_GPU_SEQ_START_ADDR"/> - <reg32 offset="0x3081" name="VBIF_XIN_HALT_CTRL1"/> - <reg32 offset="0x3084" name="VBIF_TEST_BUS_OUT_CTRL"/> - <reg32 offset="0x3085" name="VBIF_TEST_BUS1_CTRL0"/> - <reg32 offset="0x3086" name="VBIF_TEST_BUS1_CTRL1"/> - <reg32 offset="0x3087" name="VBIF_TEST_BUS2_CTRL0"/> - <reg32 offset="0x3088" name="VBIF_TEST_BUS2_CTRL1"/> - <reg32 offset="0x308c" name="VBIF_TEST_BUS_OUT"/> - <reg32 offset="0x30d0" name="VBIF_PERF_CNT_SEL0"/> - <reg32 offset="0x30d1" name="VBIF_PERF_CNT_SEL1"/> - <reg32 offset="0x30d2" name="VBIF_PERF_CNT_SEL2"/> - <reg32 offset="0x30d3" name="VBIF_PERF_CNT_SEL3"/> - <reg32 offset="0x30d8" name="VBIF_PERF_CNT_LOW0"/> - <reg32 offset="0x30d9" name="VBIF_PERF_CNT_LOW1"/> - <reg32 offset="0x30da" name="VBIF_PERF_CNT_LOW2"/> - <reg32 offset="0x30db" name="VBIF_PERF_CNT_LOW3"/> - <reg32 offset="0x30e0" name="VBIF_PERF_CNT_HIGH0"/> - <reg32 offset="0x30e1" name="VBIF_PERF_CNT_HIGH1"/> - <reg32 offset="0x30e2" name="VBIF_PERF_CNT_HIGH2"/> - <reg32 offset="0x30e3" name="VBIF_PERF_CNT_HIGH3"/> - <reg32 offset="0x3100" name="VBIF_PERF_PWR_CNT_EN0"/> - <reg32 offset="0x3101" name="VBIF_PERF_PWR_CNT_EN1"/> - <reg32 offset="0x3102" name="VBIF_PERF_PWR_CNT_EN2"/> - <reg32 offset="0x3110" name="VBIF_PERF_PWR_CNT_LOW0"/> - <reg32 offset="0x3111" name="VBIF_PERF_PWR_CNT_LOW1"/> - <reg32 offset="0x3112" name="VBIF_PERF_PWR_CNT_LOW2"/> - <reg32 offset="0x3118" name="VBIF_PERF_PWR_CNT_HIGH0"/> - <reg32 offset="0x3119" name="VBIF_PERF_PWR_CNT_HIGH1"/> - <reg32 offset="0x311a" name="VBIF_PERF_PWR_CNT_HIGH2"/> - <reg32 offset="0x3c01" name="GBIF_SCACHE_CNTL0"/> - <reg32 offset="0x3c02" name="GBIF_SCACHE_CNTL1"/> - <reg32 offset="0x3c03" name="GBIF_QSB_SIDE0"/> - <reg32 offset="0x3c04" name="GBIF_QSB_SIDE1"/> - <reg32 offset="0x3c05" name="GBIF_QSB_SIDE2"/> - <reg32 offset="0x3c06" name="GBIF_QSB_SIDE3"/> - <reg32 offset="0x3c45" name="GBIF_HALT"/> - <reg32 offset="0x3c46" name="GBIF_HALT_ACK"/> - <reg32 offset="0x3cc0" name="GBIF_PERF_PWR_CNT_EN"/> - <reg32 offset="0x3cc1" name="GBIF_PERF_PWR_CNT_CLR"/> - <reg32 offset="0x3cc2" name="GBIF_PERF_CNT_SEL"/> - <reg32 offset="0x3cc3" name="GBIF_PERF_PWR_CNT_SEL"/> - <reg32 offset="0x3cc4" name="GBIF_PERF_CNT_LOW0"/> - <reg32 offset="0x3cc5" name="GBIF_PERF_CNT_LOW1"/> - <reg32 offset="0x3cc6" name="GBIF_PERF_CNT_LOW2"/> - <reg32 offset="0x3cc7" name="GBIF_PERF_CNT_LOW3"/> - <reg32 offset="0x3cc8" name="GBIF_PERF_CNT_HIGH0"/> - <reg32 offset="0x3cc9" name="GBIF_PERF_CNT_HIGH1"/> - <reg32 offset="0x3cca" name="GBIF_PERF_CNT_HIGH2"/> - <reg32 offset="0x3ccb" name="GBIF_PERF_CNT_HIGH3"/> - <reg32 offset="0x3ccc" name="GBIF_PWR_CNT_LOW0"/> - <reg32 offset="0x3ccd" name="GBIF_PWR_CNT_LOW1"/> - <reg32 offset="0x3cce" name="GBIF_PWR_CNT_LOW2"/> - <reg32 offset="0x3ccf" name="GBIF_PWR_CNT_HIGH0"/> - <reg32 offset="0x3cd0" name="GBIF_PWR_CNT_HIGH1"/> - <reg32 offset="0x3cd1" name="GBIF_PWR_CNT_HIGH2"/> - <reg32 offset="0x8602" name="GRAS_NC_MODE_CNTL"/> - <reg32 offset="0x8610" name="GRAS_PERFCTR_TSE_SEL" stride="1" length="4"/> - <reg32 offset="0x8614" name="GRAS_PERFCTR_RAS_SEL" stride="1" length="4"/> - <reg32 offset="0x8618" name="GRAS_PERFCTR_LRZ_SEL" stride="1" length="4"/> - <reg32 offset="0x8e08" name="RB_NC_MODE_CNTL"/> - <reg32 offset="0x8e10" name="RB_PERFCTR_RB_SEL" stride="1" length="8"/> - <reg32 offset="0x8e18" name="RB_PERFCTR_CCU_SEL" stride="1" length="5"/> - <reg32 offset="0x8e2c" name="RB_PERFCTR_CMP_SEL" stride="1" length="4"/> - <reg32 offset="0x8e30" name="RB_PERFCTR_UFC_SEL" stride="1" length="6"/> - <reg32 offset="0x8e3b" name="RB_RB_SUB_BLOCK_SEL_CNTL_HOST"/> - <reg32 offset="0x8e3d" name="RB_RB_SUB_BLOCK_SEL_CNTL_CD"/> - <reg32 offset="0x8e50" name="RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE"/> - <reg32 offset="0x960b" name="VPC_PERFCTR_VPC_SEL" stride="1" length="12"/> - <reg32 offset="0x9e42" name="PC_PERFCTR_PC_SEL" stride="1" length="16"/> - <reg32 offset="0xa610" name="VFD_PERFCTR_VFD_SEL" stride="1" length="16"/> - <reg32 offset="0xae02" name="SP_NC_MODE_CNTL"/> - <reg32 offset="0xae60" name="SP_PERFCTR_HLSQ_SEL" stride="1" length="6"/> - <reg32 offset="0xae6d" name="SP_READ_SEL"/> - <reg32 offset="0xae80" name="SP_PERFCTR_SP_SEL" stride="1" length="36"/> - <reg32 offset="0xb604" name="TPL1_NC_MODE_CNTL"/> - <reg32 offset="0xb610" name="TPL1_PERFCTR_TP_SEL" stride="1" length="18"/> - <reg32 offset="0xc000" name="SP_AHB_READ_APERTURE"/> - <reg32 offset="0xf400" name="RBBM_SECVID_TRUST_CNTL"/> - <reg64 offset="0xf800" name="RBBM_SECVID_TSB_TRUSTED_BASE"/> - <reg32 offset="0xf802" name="RBBM_SECVID_TSB_TRUSTED_SIZE"/> - <reg32 offset="0xf803" name="RBBM_SECVID_TSB_CNTL"/> - <reg64 offset="0xfc00" name="RBBM_SECVID_TSB_STATUS"/> -</domain> - -</database> diff --git a/lib/mesa/src/freedreno/vulkan/tu_android.c b/lib/mesa/src/freedreno/vulkan/tu_android.c deleted file mode 100644 index 1ebc9e726..000000000 --- a/lib/mesa/src/freedreno/vulkan/tu_android.c +++ /dev/null @@ -1,382 +0,0 @@ -/* - * Copyright © 2017, Google Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "tu_private.h" - -#include <hardware/gralloc.h> -#include <hardware/hardware.h> -#include <hardware/hwvulkan.h> -#include <libsync.h> - -#include <vulkan/vk_android_native_buffer.h> -#include <vulkan/vk_icd.h> - -static int -tu_hal_open(const struct hw_module_t *mod, - const char *id, - struct hw_device_t **dev); -static int -tu_hal_close(struct hw_device_t *dev); - -static void UNUSED -static_asserts(void) -{ - STATIC_ASSERT(HWVULKAN_DISPATCH_MAGIC == ICD_LOADER_MAGIC); -} - -PUBLIC struct hwvulkan_module_t HAL_MODULE_INFO_SYM = { - .common = - { - .tag = HARDWARE_MODULE_TAG, - .module_api_version = HWVULKAN_MODULE_API_VERSION_0_1, - .hal_api_version = HARDWARE_MAKE_API_VERSION(1, 0), - .id = HWVULKAN_HARDWARE_MODULE_ID, - .name = "AMD Vulkan HAL", - .author = "Google", - .methods = - &(hw_module_methods_t){ - .open = tu_hal_open, - }, - }, -}; - -/* If any bits in test_mask are set, then unset them and return true. */ -static inline bool -unmask32(uint32_t *inout_mask, uint32_t test_mask) -{ - uint32_t orig_mask = *inout_mask; - *inout_mask &= ~test_mask; - return *inout_mask != orig_mask; -} - -static int -tu_hal_open(const struct hw_module_t *mod, - const char *id, - struct hw_device_t **dev) -{ - assert(mod == &HAL_MODULE_INFO_SYM.common); - assert(strcmp(id, HWVULKAN_DEVICE_0) == 0); - - hwvulkan_device_t *hal_dev = malloc(sizeof(*hal_dev)); - if (!hal_dev) - return -1; - - *hal_dev = (hwvulkan_device_t){ - .common = - { - .tag = HARDWARE_DEVICE_TAG, - .version = HWVULKAN_DEVICE_API_VERSION_0_1, - .module = &HAL_MODULE_INFO_SYM.common, - .close = tu_hal_close, - }, - .EnumerateInstanceExtensionProperties = - tu_EnumerateInstanceExtensionProperties, - .CreateInstance = tu_CreateInstance, - .GetInstanceProcAddr = tu_GetInstanceProcAddr, - }; - - *dev = &hal_dev->common; - return 0; -} - -static int -tu_hal_close(struct hw_device_t *dev) -{ - /* hwvulkan.h claims that hw_device_t::close() is never called. */ - return -1; -} - -VkResult -tu_image_from_gralloc(VkDevice device_h, - const VkImageCreateInfo *base_info, - const VkNativeBufferANDROID *gralloc_info, - const VkAllocationCallbacks *alloc, - VkImage *out_image_h) - -{ - TU_FROM_HANDLE(tu_device, device, device_h); - VkImage image_h = VK_NULL_HANDLE; - struct tu_image *image = NULL; - struct tu_bo *bo = NULL; - VkResult result; - - result = tu_image_create( - device_h, - &(struct tu_image_create_info) { - .vk_info = base_info, .scanout = true, .no_metadata_planes = true }, - alloc, &image_h); - - if (result != VK_SUCCESS) - return result; - - if (gralloc_info->handle->numFds != 1) { - return vk_errorf(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE, - "VkNativeBufferANDROID::handle::numFds is %d, " - "expected 1", - gralloc_info->handle->numFds); - } - - /* Do not close the gralloc handle's dma_buf. The lifetime of the dma_buf - * must exceed that of the gralloc handle, and we do not own the gralloc - * handle. - */ - int dma_buf = gralloc_info->handle->data[0]; - - image = tu_image_from_handle(image_h); - - VkDeviceMemory memory_h; - - const VkMemoryDedicatedAllocateInfo ded_alloc = { - .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, - .pNext = NULL, - .buffer = VK_NULL_HANDLE, - .image = image_h - }; - - const VkImportMemoryFdInfo import_info = { - .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO, - .pNext = &ded_alloc, - .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT, - .fd = dup(dma_buf), - }; - /* Find the first VRAM memory type, or GART for PRIME images. */ - int memory_type_index = -1; - for (int i = 0; - i < device->physical_device->memory_properties.memoryTypeCount; ++i) { - bool is_local = - !!(device->physical_device->memory_properties.memoryTypes[i] - .propertyFlags & - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); - if (is_local) { - memory_type_index = i; - break; - } - } - - /* fallback */ - if (memory_type_index == -1) - memory_type_index = 0; - - result = - tu_AllocateMemory(device_h, - &(VkMemoryAllocateInfo) { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, - .pNext = &import_info, - .allocationSize = image->size, - .memoryTypeIndex = memory_type_index, - }, - alloc, &memory_h); - if (result != VK_SUCCESS) - goto fail_create_image; - - tu_BindImageMemory(device_h, image_h, memory_h, 0); - - image->owned_memory = memory_h; - /* Don't clobber the out-parameter until success is certain. */ - *out_image_h = image_h; - - return VK_SUCCESS; - -fail_create_image: -fail_size: - tu_DestroyImage(device_h, image_h, alloc); - - return result; -} - -VkResult -tu_GetSwapchainGrallocUsageANDROID(VkDevice device_h, - VkFormat format, - VkImageUsageFlags imageUsage, - int *grallocUsage) -{ - TU_FROM_HANDLE(tu_device, device, device_h); - struct tu_physical_device *phys_dev = device->physical_device; - VkPhysicalDevice phys_dev_h = tu_physical_device_to_handle(phys_dev); - VkResult result; - - *grallocUsage = 0; - - /* WARNING: Android Nougat's libvulkan.so hardcodes the VkImageUsageFlags - * returned to applications via - * VkSurfaceCapabilitiesKHR::supportedUsageFlags. - * The relevant code in libvulkan/swapchain.cpp contains this fun comment: - * - * TODO(jessehall): I think these are right, but haven't thought hard - * about it. Do we need to query the driver for support of any of - * these? - * - * Any disagreement between this function and the hardcoded - * VkSurfaceCapabilitiesKHR:supportedUsageFlags causes tests - * dEQP-VK.wsi.android.swapchain.*.image_usage to fail. - */ - - const VkPhysicalDeviceImageFormatInfo2 image_format_info = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2, - .format = format, - .type = VK_IMAGE_TYPE_2D, - .tiling = VK_IMAGE_TILING_OPTIMAL, - .usage = imageUsage, - }; - - VkImageFormatProperties2 image_format_props = { - .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2, - }; - - /* Check that requested format and usage are supported. */ - result = tu_GetPhysicalDeviceImageFormatProperties2( - phys_dev_h, &image_format_info, &image_format_props); - if (result != VK_SUCCESS) { - return vk_errorf(device->instance, result, - "tu_GetPhysicalDeviceImageFormatProperties2 failed " - "inside %s", - __func__); - } - - if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)) - *grallocUsage |= GRALLOC_USAGE_HW_RENDER; - - if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | - VK_IMAGE_USAGE_SAMPLED_BIT | - VK_IMAGE_USAGE_STORAGE_BIT | - VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) - *grallocUsage |= GRALLOC_USAGE_HW_TEXTURE; - - /* All VkImageUsageFlags not explicitly checked here are unsupported for - * gralloc swapchains. - */ - if (imageUsage != 0) { - return vk_errorf(device->instance, VK_ERROR_FORMAT_NOT_SUPPORTED, - "unsupported VkImageUsageFlags(0x%x) for gralloc " - "swapchain", - imageUsage); - } - - /* - * FINISHME: Advertise all display-supported formats. Mostly - * DRM_FORMAT_ARGB2101010 and DRM_FORMAT_ABGR2101010, but need to check - * what we need for 30-bit colors. - */ - if (format == VK_FORMAT_B8G8R8A8_UNORM || - format == VK_FORMAT_B5G6R5_UNORM_PACK16) { - *grallocUsage |= GRALLOC_USAGE_HW_FB | GRALLOC_USAGE_HW_COMPOSER | - GRALLOC_USAGE_EXTERNAL_DISP; - } - - if (*grallocUsage == 0) - return VK_ERROR_FORMAT_NOT_SUPPORTED; - - return VK_SUCCESS; -} - -VkResult -tu_AcquireImageANDROID(VkDevice device, - VkImage image_h, - int nativeFenceFd, - VkSemaphore semaphore, - VkFence fence) -{ - VkResult semaphore_result = VK_SUCCESS, fence_result = VK_SUCCESS; - - if (semaphore != VK_NULL_HANDLE) { - int semaphore_fd = - nativeFenceFd >= 0 ? dup(nativeFenceFd) : nativeFenceFd; - semaphore_result = tu_ImportSemaphoreFdKHR( - device, &(VkImportSemaphoreFdInfoKHR) { - .sType = VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_FD_INFO_KHR, - .flags = VK_SEMAPHORE_IMPORT_TEMPORARY_BIT, - .fd = semaphore_fd, - .semaphore = semaphore, - }); - } - - if (fence != VK_NULL_HANDLE) { - int fence_fd = nativeFenceFd >= 0 ? dup(nativeFenceFd) : nativeFenceFd; - fence_result = tu_ImportFenceFdKHR( - device, &(VkImportFenceFdInfoKHR) { - .sType = VK_STRUCTURE_TYPE_IMPORT_FENCE_FD_INFO_KHR, - .flags = VK_FENCE_IMPORT_TEMPORARY_BIT, - .fd = fence_fd, - .fence = fence, - }); - } - - close(nativeFenceFd); - - if (semaphore_result != VK_SUCCESS) - return semaphore_result; - return fence_result; -} - -VkResult -tu_QueueSignalReleaseImageANDROID(VkQueue _queue, - uint32_t waitSemaphoreCount, - const VkSemaphore *pWaitSemaphores, - VkImage image, - int *pNativeFenceFd) -{ - TU_FROM_HANDLE(tu_queue, queue, _queue); - VkResult result = VK_SUCCESS; - - if (waitSemaphoreCount == 0) { - if (pNativeFenceFd) - *pNativeFenceFd = -1; - return VK_SUCCESS; - } - - int fd = -1; - - for (uint32_t i = 0; i < waitSemaphoreCount; ++i) { - int tmp_fd; - result = tu_GetSemaphoreFdKHR( - tu_device_to_handle(queue->device), - &(VkSemaphoreGetFdInfoKHR) { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR, - .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT, - .semaphore = pWaitSemaphores[i], - }, - &tmp_fd); - if (result != VK_SUCCESS) { - if (fd >= 0) - close(fd); - return result; - } - - if (fd < 0) - fd = tmp_fd; - else if (tmp_fd >= 0) { - sync_accumulate("tu", &fd, tmp_fd); - close(tmp_fd); - } - } - - if (pNativeFenceFd) { - *pNativeFenceFd = fd; - } else if (fd >= 0) { - close(fd); - /* We still need to do the exports, to reset the semaphores, but - * otherwise we don't wait on them. */ - } - return VK_SUCCESS; -} diff --git a/lib/mesa/src/freedreno/vulkan/tu_autotune.c b/lib/mesa/src/freedreno/vulkan/tu_autotune.c deleted file mode 100644 index 4476e50b7..000000000 --- a/lib/mesa/src/freedreno/vulkan/tu_autotune.c +++ /dev/null @@ -1,637 +0,0 @@ -/* - * Copyright © 2021 Igalia S.L. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include <vulkan/vulkan_core.h> - -#include "tu_autotune.h" -#include "tu_private.h" -#include "tu_cs.h" - -/* How does it work? - * - * - For each renderpass we calculate the number of samples passed - * by storing the number before and after in GPU memory. - * - To store the values each command buffer holds GPU memory which - * expands with more renderpasses being written. - * - For each renderpass we create tu_renderpass_result entry which - * points to the results in GPU memory. - * - Later on tu_renderpass_result would be added to the - * tu_renderpass_history entry which aggregate results for a - * given renderpass. - * - On submission: - * - Process results which fence was signalled. - * - Free per-submission data which we now don't need. - * - * - Create a command stream to write a fence value. This way we would - * know when we could safely read the results. - * - We cannot rely on the command buffer's lifetime when referencing - * its resources since the buffer could be destroyed before we process - * the results. - * - For each command buffer: - * - Reference its GPU memory. - * - Move if ONE_TIME_SUBMIT or copy all tu_renderpass_result to the queue. - * - * Since the command buffers could be recorded on different threads - * we have to maintaining some amount of locking history table, - * however we change the table only in a single thread at the submission - * time, so in most cases there will be no locking. - */ - -void -tu_autotune_free_results_locked(struct tu_device *dev, struct list_head *results); - -#define TU_AUTOTUNE_DEBUG_LOG 0 -/* Dump history entries on autotuner finish, - * could be used to gather data from traces. - */ -#define TU_AUTOTUNE_LOG_AT_FINISH 0 - -/* How many last renderpass stats are taken into account. */ -#define MAX_HISTORY_RESULTS 5 -/* For how many submissions we store renderpass stats. */ -#define MAX_HISTORY_LIFETIME 128 - - -/** - * Tracks results for a given renderpass key - */ -struct tu_renderpass_history { - uint64_t key; - - /* We would delete old history entries */ - uint32_t last_fence; - - /** - * List of recent fd_renderpass_result's - */ - struct list_head results; - uint32_t num_results; - - uint32_t avg_samples; -}; - -/* Holds per-submission cs which writes the fence. */ -struct tu_submission_data { - struct list_head node; - uint32_t fence; - - struct tu_cs fence_cs; - uint32_t buffers_count; -}; - -static struct tu_submission_data * -create_submission_data(struct tu_device *dev, struct tu_autotune *at) -{ - struct tu_submission_data *submission_data = - calloc(1, sizeof(struct tu_submission_data)); - submission_data->fence = at->fence_counter; - - struct tu_cs* fence_cs = &submission_data->fence_cs; - tu_cs_init(fence_cs, dev, TU_CS_MODE_GROW, 5); - tu_cs_begin(fence_cs); - - tu_cs_emit_pkt7(fence_cs, CP_EVENT_WRITE, 4); - tu_cs_emit(fence_cs, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS)); - tu_cs_emit_qw(fence_cs, dev->global_bo->iova + gb_offset(autotune_fence)); - tu_cs_emit(fence_cs, at->fence_counter); - - tu_cs_end(fence_cs); - - list_addtail(&submission_data->node, &at->pending_submission_data); - - return submission_data; -} - -static void -free_submission_data(struct tu_submission_data *data) -{ - list_del(&data->node); - tu_cs_finish(&data->fence_cs); - - free(data); -} - -#define APPEND_TO_HASH(state, field) \ - XXH64_update(state, &field, sizeof(field)); - -static uint64_t -hash_renderpass_instance(const struct tu_render_pass *pass, - const struct tu_framebuffer *framebuffer, - const struct tu_cmd_buffer *cmd) { - XXH64_state_t hash_state; - XXH64_reset(&hash_state, 0); - - APPEND_TO_HASH(&hash_state, framebuffer->width); - APPEND_TO_HASH(&hash_state, framebuffer->height); - APPEND_TO_HASH(&hash_state, framebuffer->layers); - - APPEND_TO_HASH(&hash_state, pass->attachment_count); - XXH64_update(&hash_state, pass->attachments, pass->attachment_count * sizeof(pass->attachments[0])); - - for (unsigned i = 0; i < pass->attachment_count; i++) { - APPEND_TO_HASH(&hash_state, cmd->state.attachments[i]->view.width); - APPEND_TO_HASH(&hash_state, cmd->state.attachments[i]->view.height); - APPEND_TO_HASH(&hash_state, cmd->state.attachments[i]->image->vk_format); - APPEND_TO_HASH(&hash_state, cmd->state.attachments[i]->image->layer_count); - APPEND_TO_HASH(&hash_state, cmd->state.attachments[i]->image->level_count); - } - - APPEND_TO_HASH(&hash_state, pass->subpass_count); - for (unsigned i = 0; i < pass->subpass_count; i++) { - APPEND_TO_HASH(&hash_state, pass->subpasses[i].samples); - APPEND_TO_HASH(&hash_state, pass->subpasses[i].input_count); - APPEND_TO_HASH(&hash_state, pass->subpasses[i].color_count); - APPEND_TO_HASH(&hash_state, pass->subpasses[i].resolve_count); - } - - return XXH64_digest(&hash_state); -} - -static void -free_result(struct tu_device *dev, struct tu_renderpass_result *result) -{ - tu_suballoc_bo_free(&dev->autotune_suballoc, &result->bo); - list_del(&result->node); - free(result); -} - -static void -free_history(struct tu_device *dev, struct tu_renderpass_history *history) -{ - tu_autotune_free_results_locked(dev, &history->results); - free(history); -} - -static bool -get_history(struct tu_autotune *at, uint64_t rp_key, uint32_t *avg_samples) -{ - bool has_history = false; - - /* If the lock contantion would be found in the wild - - * we could use try_lock here. - */ - u_rwlock_rdlock(&at->ht_lock); - struct hash_entry *entry = - _mesa_hash_table_search(at->ht, &rp_key); - if (entry) { - struct tu_renderpass_history *history = entry->data; - if (history->num_results > 0) { - *avg_samples = p_atomic_read(&history->avg_samples); - has_history = true; - } - } - u_rwlock_rdunlock(&at->ht_lock); - - return has_history; -} - -static struct tu_renderpass_result * -create_history_result(struct tu_autotune *at, uint64_t rp_key) -{ - struct tu_renderpass_result *result = calloc(1, sizeof(*result)); - result->rp_key = rp_key; - - return result; -} - -static void -history_add_result(struct tu_device *dev, struct tu_renderpass_history *history, - struct tu_renderpass_result *result) -{ - list_delinit(&result->node); - list_add(&result->node, &history->results); - - if (history->num_results < MAX_HISTORY_RESULTS) { - history->num_results++; - } else { - /* Once above the limit, start popping old results off the - * tail of the list: - */ - struct tu_renderpass_result *old_result = - list_last_entry(&history->results, struct tu_renderpass_result, node); - mtx_lock(&dev->autotune_mutex); - free_result(dev, old_result); - mtx_unlock(&dev->autotune_mutex); - } - - /* Do calculations here to avoid locking history in tu_autotune_use_bypass */ - uint32_t total_samples = 0; - list_for_each_entry(struct tu_renderpass_result, result, - &history->results, node) { - total_samples += result->samples_passed; - } - - float avg_samples = (float)total_samples / (float)history->num_results; - p_atomic_set(&history->avg_samples, (uint32_t)avg_samples); -} - -static void -process_results(struct tu_autotune *at) -{ - struct tu_device *dev = at->device; - struct tu6_global *global = dev->global_bo->map; - uint32_t current_fence = global->autotune_fence; - - list_for_each_entry_safe(struct tu_renderpass_result, result, - &at->pending_results, node) { - if (result->fence > current_fence) - break; - - struct tu_renderpass_history *history = result->history; - result->samples_passed = - result->samples->samples_end - result->samples->samples_start; - - history_add_result(dev, history, result); - } - - list_for_each_entry_safe(struct tu_submission_data, submission_data, - &at->pending_submission_data, node) { - if (submission_data->fence > current_fence) - break; - - free_submission_data(submission_data); - } -} - -static void -queue_pending_results(struct tu_autotune *at, struct tu_cmd_buffer *cmdbuf) -{ - bool one_time_submit = cmdbuf->usage_flags & - VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - - if (one_time_submit) { - /* We can just steal the list since it won't be resubmitted again */ - list_splicetail(&cmdbuf->renderpass_autotune_results, - &at->pending_results); - list_inithead(&cmdbuf->renderpass_autotune_results); - } else { - list_for_each_entry_safe(struct tu_renderpass_result, result, - &cmdbuf->renderpass_autotune_results, node) { - /* TODO: copying each result isn't nice */ - struct tu_renderpass_result *copy = malloc(sizeof(*result)); - *copy = *result; - tu_bo_get_ref(copy->bo.bo); - list_addtail(©->node, &at->pending_results); - } - } -} - -struct tu_cs * -tu_autotune_on_submit(struct tu_device *dev, - struct tu_autotune *at, - struct tu_cmd_buffer **cmd_buffers, - uint32_t cmd_buffer_count) -{ - /* We are single-threaded here */ - - process_results(at); - - /* pre-increment so zero isn't valid fence */ - uint32_t new_fence = ++at->fence_counter; - uint32_t result_buffers = 0; - - /* Create history entries here to minimize work and locking being - * done on renderpass end. - */ - for (uint32_t i = 0; i < cmd_buffer_count; i++) { - struct tu_cmd_buffer *cmdbuf = cmd_buffers[i]; - list_for_each_entry_safe(struct tu_renderpass_result, result, - &cmdbuf->renderpass_autotune_results, node) { - struct tu_renderpass_history *history; - struct hash_entry *entry = - _mesa_hash_table_search(at->ht, &result->rp_key); - if (!entry) { - history = calloc(1, sizeof(*history)); - history->key = result->rp_key; - list_inithead(&history->results); - - u_rwlock_wrlock(&at->ht_lock); - _mesa_hash_table_insert(at->ht, &history->key, history); - u_rwlock_wrunlock(&at->ht_lock); - } else { - history = (struct tu_renderpass_history *) entry->data; - } - - history->last_fence = new_fence; - - result->fence = new_fence; - result->history = history; - } - - if (!list_is_empty(&cmdbuf->renderpass_autotune_results)) { - result_buffers++; - } - } - - struct tu_submission_data *submission_data = - create_submission_data(dev, at); - submission_data->buffers_count = result_buffers; - - for (uint32_t i = 0; i < cmd_buffer_count; i++) { - struct tu_cmd_buffer *cmdbuf = cmd_buffers[i]; - if (list_is_empty(&cmdbuf->renderpass_autotune_results)) - continue; - - queue_pending_results(at, cmdbuf); - } - -#if TU_AUTOTUNE_DEBUG_LOG != 0 - mesa_logi("Total history entries: %u", at->ht->entries); -#endif - - /* Cleanup old entries from history table. The assumption - * here is that application doesn't hold many old unsubmitted - * command buffers, otherwise this table may grow big. - */ - hash_table_foreach(at->ht, entry) { - struct tu_renderpass_history *history = entry->data; - if (history->last_fence == 0 || - (new_fence - history->last_fence) <= MAX_HISTORY_LIFETIME) - continue; - -#if TU_AUTOTUNE_DEBUG_LOG != 0 - mesa_logi("Removed old history entry %016"PRIx64"", history->key); -#endif - - u_rwlock_wrlock(&at->ht_lock); - _mesa_hash_table_remove_key(at->ht, &history->key); - u_rwlock_wrunlock(&at->ht_lock); - - mtx_lock(&dev->autotune_mutex); - free_history(dev, history); - mtx_unlock(&dev->autotune_mutex); - } - - return &submission_data->fence_cs; -} - -static bool -renderpass_key_equals(const void *_a, const void *_b) -{ - return *(uint64_t *)_a == *(uint64_t *)_b; -} - -static uint32_t -renderpass_key_hash(const void *_a) -{ - return *((uint64_t *) _a) & 0xffffffff; -} - -VkResult -tu_autotune_init(struct tu_autotune *at, struct tu_device *dev) -{ - at->enabled = true; - at->device = dev; - at->ht = _mesa_hash_table_create(NULL, - renderpass_key_hash, - renderpass_key_equals); - u_rwlock_init(&at->ht_lock); - - list_inithead(&at->pending_results); - list_inithead(&at->pending_submission_data); - - return VK_SUCCESS; -} - -void -tu_autotune_fini(struct tu_autotune *at, struct tu_device *dev) -{ -#if TU_AUTOTUNE_LOG_AT_FINISH != 0 - while (!list_is_empty(&at->pending_results)) { - process_results(at); - } - - hash_table_foreach(at->ht, entry) { - struct tu_renderpass_history *history = entry->data; - - mesa_logi("%016"PRIx64" \tavg_passed=%u results=%u", - history->key, history->avg_samples, history->num_results); - } -#endif - - tu_autotune_free_results(dev, &at->pending_results); - - mtx_lock(&dev->autotune_mutex); - hash_table_foreach(at->ht, entry) { - struct tu_renderpass_history *history = entry->data; - free_history(dev, history); - } - mtx_unlock(&dev->autotune_mutex); - - list_for_each_entry_safe(struct tu_submission_data, submission_data, - &at->pending_submission_data, node) { - free_submission_data(submission_data); - } - - _mesa_hash_table_destroy(at->ht, NULL); - u_rwlock_destroy(&at->ht_lock); -} - -bool -tu_autotune_submit_requires_fence(struct tu_cmd_buffer **cmd_buffers, - uint32_t cmd_buffer_count) -{ - for (uint32_t i = 0; i < cmd_buffer_count; i++) { - struct tu_cmd_buffer *cmdbuf = cmd_buffers[i]; - if (!list_is_empty(&cmdbuf->renderpass_autotune_results)) - return true; - } - - return false; -} - -void -tu_autotune_free_results_locked(struct tu_device *dev, struct list_head *results) -{ - list_for_each_entry_safe(struct tu_renderpass_result, result, - results, node) { - free_result(dev, result); - } -} - -void -tu_autotune_free_results(struct tu_device *dev, struct list_head *results) -{ - mtx_lock(&dev->autotune_mutex); - tu_autotune_free_results_locked(dev, results); - mtx_unlock(&dev->autotune_mutex); -} - -static bool -fallback_use_bypass(const struct tu_render_pass *pass, - const struct tu_framebuffer *framebuffer, - const struct tu_cmd_buffer *cmd_buffer) -{ - if (cmd_buffer->state.drawcall_count > 5) - return false; - - for (unsigned i = 0; i < pass->subpass_count; i++) { - if (pass->subpasses[i].samples != VK_SAMPLE_COUNT_1_BIT) - return false; - } - - return true; -} - -bool -tu_autotune_use_bypass(struct tu_autotune *at, - struct tu_cmd_buffer *cmd_buffer, - struct tu_renderpass_result **autotune_result) -{ - const struct tu_render_pass *pass = cmd_buffer->state.pass; - const struct tu_framebuffer *framebuffer = cmd_buffer->state.framebuffer; - - for (unsigned i = 0; i < pass->subpass_count; i++) { - const struct tu_subpass *subpass = &pass->subpasses[i]; - /* GMEM works much faster in this case */ - if (subpass->raster_order_attachment_access) - return false; - - /* Would be very slow in sysmem mode because we have to enable - * SINGLE_PRIM_MODE(FLUSH_PER_OVERLAP_AND_OVERWRITE) - */ - if (subpass->feedback_loop_color || subpass->feedback_loop_ds) - return false; - } - - /* For VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT buffers - * we would have to allocate GPU memory at the submit time and copy - * results into it. - * Native games ususally don't use it, Zink and DXVK don't use it, - * D3D12 doesn't have such concept. - */ - bool simultaneous_use = - cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT; - - if (!at->enabled || simultaneous_use) - return fallback_use_bypass(pass, framebuffer, cmd_buffer); - - /* We use 64bit hash as a key since we don't fear rare hash collision, - * the worst that would happen is sysmem being selected when it should - * have not, and with 64bit it would be extremely rare. - * - * Q: Why not make the key from framebuffer + renderpass pointers? - * A: At least DXVK creates new framebuffers each frame while keeping - * renderpasses the same. Also we want to support replaying a single - * frame in a loop for testing. - */ - uint64_t renderpass_key = hash_renderpass_instance(pass, framebuffer, cmd_buffer); - - *autotune_result = create_history_result(at, renderpass_key); - - uint32_t avg_samples = 0; - if (get_history(at, renderpass_key, &avg_samples)) { - /* TODO we should account for load/stores/clears/resolves especially - * with low drawcall count and ~fb_size samples passed, in D3D11 games - * we are seeing many renderpasses like: - * - color attachment load - * - single fullscreen draw - * - color attachment store - */ - - /* Low sample count could mean there was only a clear.. or there was - * a clear plus draws that touch no or few samples - */ - if (avg_samples < 500) { -#if TU_AUTOTUNE_DEBUG_LOG != 0 - mesa_logi("%016"PRIx64":%u\t avg_samples=%u selecting sysmem", - renderpass_key, cmd_buffer->state.drawcall_count, avg_samples); -#endif - return true; - } - - /* Cost-per-sample is an estimate for the average number of reads+ - * writes for a given passed sample. - */ - float sample_cost = cmd_buffer->state.total_drawcalls_cost; - sample_cost /= cmd_buffer->state.drawcall_count; - - float single_draw_cost = (avg_samples * sample_cost) / cmd_buffer->state.drawcall_count; - - bool select_sysmem = single_draw_cost < 6000.0; - -#if TU_AUTOTUNE_DEBUG_LOG != 0 - mesa_logi("%016"PRIx64":%u\t avg_samples=%u, " - "sample_cost=%f, single_draw_cost=%f selecting %s", - renderpass_key, cmd_buffer->state.drawcall_count, avg_samples, - sample_cost, single_draw_cost, select_sysmem ? "sysmem" : "gmem"); -#endif - - return select_sysmem; - } - - return fallback_use_bypass(pass, framebuffer, cmd_buffer); -} - -void -tu_autotune_begin_renderpass(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - struct tu_renderpass_result *autotune_result) -{ - if (!autotune_result) - return; - - struct tu_device *dev = cmd->device; - - static const uint32_t size = sizeof(struct tu_renderpass_samples); - - mtx_lock(&dev->autotune_mutex); - VkResult ret = tu_suballoc_bo_alloc(&autotune_result->bo, &dev->autotune_suballoc, size, size); - mtx_unlock(&dev->autotune_mutex); - if (ret != VK_SUCCESS) { - autotune_result->bo.iova = 0; - return; - } - - uint64_t result_iova = autotune_result->bo.iova; - - autotune_result->samples = tu_suballoc_bo_map(&autotune_result->bo); - - tu_cs_emit_regs(cs, A6XX_RB_SAMPLE_COUNT_CONTROL(.copy = true)); - - tu_cs_emit_regs(cs, A6XX_RB_SAMPLE_COUNT_ADDR(.qword = result_iova)); - - tu_cs_emit_pkt7(cs, CP_EVENT_WRITE, 1); - tu_cs_emit(cs, ZPASS_DONE); -} - -void tu_autotune_end_renderpass(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - struct tu_renderpass_result *autotune_result) -{ - if (!autotune_result) - return; - - if (!autotune_result->bo.iova) - return; - - uint64_t result_iova = autotune_result->bo.iova + - offsetof(struct tu_renderpass_samples, samples_end); - - tu_cs_emit_regs(cs, A6XX_RB_SAMPLE_COUNT_CONTROL(.copy = true)); - - tu_cs_emit_regs(cs, A6XX_RB_SAMPLE_COUNT_ADDR(.qword = result_iova)); - - tu_cs_emit_pkt7(cs, CP_EVENT_WRITE, 1); - tu_cs_emit(cs, ZPASS_DONE); -} diff --git a/lib/mesa/src/freedreno/vulkan/tu_clear_blit.c b/lib/mesa/src/freedreno/vulkan/tu_clear_blit.c deleted file mode 100644 index ae462d867..000000000 --- a/lib/mesa/src/freedreno/vulkan/tu_clear_blit.c +++ /dev/null @@ -1,3421 +0,0 @@ -/* - * Copyright 2019-2020 Valve Corporation - * SPDX-License-Identifier: MIT - * - * Authors: - * Jonathan Marek <jonathan@marek.ca> - */ - -#include "tu_clear_blit.h" - -#include "ir3/ir3_nir.h" - -#include "util/format_r11g11b10f.h" -#include "util/format_rgb9e5.h" -#include "util/format_srgb.h" -#include "util/half_float.h" -#include "compiler/nir/nir_builder.h" - -#include "tu_cmd_buffer.h" -#include "tu_cs.h" -#include "tu_formats.h" -#include "tu_image.h" -#include "tu_tracepoints.h" - -static uint32_t -tu_pack_float32_for_unorm(float val, int bits) -{ - return _mesa_lroundevenf(CLAMP(val, 0.0f, 1.0f) * (float) ((1 << bits) - 1)); -} - -/* r2d_ = BLIT_OP_SCALE operations */ - -static enum a6xx_2d_ifmt -format_to_ifmt(enum pipe_format format) -{ - if (format == PIPE_FORMAT_Z24_UNORM_S8_UINT || - format == PIPE_FORMAT_Z24X8_UNORM) - return R2D_UNORM8; - - /* get_component_bits doesn't work with depth/stencil formats: */ - if (format == PIPE_FORMAT_Z16_UNORM || format == PIPE_FORMAT_Z32_FLOAT) - return R2D_FLOAT32; - if (format == PIPE_FORMAT_S8_UINT) - return R2D_INT8; - if (format == PIPE_FORMAT_A8_UNORM) - return R2D_UNORM8; - - /* use the size of the red channel to find the corresponding "ifmt" */ - bool is_int = util_format_is_pure_integer(format); - switch (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_X)) { - case 4: case 5: case 8: - return is_int ? R2D_INT8 : R2D_UNORM8; - case 10: case 11: - return is_int ? R2D_INT16 : R2D_FLOAT16; - case 16: - if (util_format_is_float(format)) - return R2D_FLOAT16; - return is_int ? R2D_INT16 : R2D_FLOAT32; - case 32: - return is_int ? R2D_INT32 : R2D_FLOAT32; - default: - unreachable("bad format"); - return 0; - } -} - -static void -r2d_coords(struct tu_cs *cs, - const VkOffset2D *dst, - const VkOffset2D *src, - const VkExtent2D *extent) -{ - tu_cs_emit_regs(cs, - A6XX_GRAS_2D_DST_TL(.x = dst->x, .y = dst->y), - A6XX_GRAS_2D_DST_BR(.x = dst->x + extent->width - 1, .y = dst->y + extent->height - 1)); - - if (!src) - return; - - tu_cs_emit_regs(cs, - A6XX_GRAS_2D_SRC_TL_X(src->x), - A6XX_GRAS_2D_SRC_BR_X(src->x + extent->width - 1), - A6XX_GRAS_2D_SRC_TL_Y(src->y), - A6XX_GRAS_2D_SRC_BR_Y(src->y + extent->height - 1)); -} - -static void -r2d_clear_value(struct tu_cs *cs, enum pipe_format format, const VkClearValue *val) -{ - uint32_t clear_value[4] = {}; - - switch (format) { - case PIPE_FORMAT_Z24_UNORM_S8_UINT: - case PIPE_FORMAT_Z24X8_UNORM: - /* cleared as r8g8b8a8_unorm using special format */ - clear_value[0] = tu_pack_float32_for_unorm(val->depthStencil.depth, 24); - clear_value[1] = clear_value[0] >> 8; - clear_value[2] = clear_value[0] >> 16; - clear_value[3] = val->depthStencil.stencil; - break; - case PIPE_FORMAT_Z16_UNORM: - case PIPE_FORMAT_Z32_FLOAT: - /* R2D_FLOAT32 */ - clear_value[0] = fui(val->depthStencil.depth); - break; - case PIPE_FORMAT_S8_UINT: - clear_value[0] = val->depthStencil.stencil; - break; - case PIPE_FORMAT_R9G9B9E5_FLOAT: - /* cleared as UINT32 */ - clear_value[0] = float3_to_rgb9e5(val->color.float32); - break; - default: - assert(!util_format_is_depth_or_stencil(format)); - const struct util_format_description *desc = util_format_description(format); - enum a6xx_2d_ifmt ifmt = format_to_ifmt(format); - - assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN || - format == PIPE_FORMAT_R11G11B10_FLOAT); - - for (unsigned i = 0; i < desc->nr_channels; i++) { - const struct util_format_channel_description *ch = &desc->channel[i]; - if (ifmt == R2D_UNORM8) { - float linear = val->color.float32[i]; - if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB && i < 3) - linear = util_format_linear_to_srgb_float(val->color.float32[i]); - - if (ch->type == UTIL_FORMAT_TYPE_SIGNED) - clear_value[i] = _mesa_lroundevenf(CLAMP(linear, -1.0f, 1.0f) * 127.0f); - else - clear_value[i] = tu_pack_float32_for_unorm(linear, 8); - } else if (ifmt == R2D_FLOAT16) { - clear_value[i] = _mesa_float_to_half(val->color.float32[i]); - } else { - assert(ifmt == R2D_FLOAT32 || ifmt == R2D_INT32 || - ifmt == R2D_INT16 || ifmt == R2D_INT8); - clear_value[i] = val->color.uint32[i]; - } - } - break; - } - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_SRC_SOLID_C0, 4); - tu_cs_emit_array(cs, clear_value, 4); -} - -static void -fixup_src_format(enum pipe_format *src_format, enum pipe_format dst_format, - enum a6xx_format *fmt) -{ - /* When blitting S8 -> D24S8 or vice versa, we have to override S8, which - * is normally R8_UINT for sampling/blitting purposes, to a unorm format. - * We also have to move stencil, which is normally in the .w channel, into - * the right channel. Reintepreting the S8 texture as A8_UNORM solves both - * problems, and avoids using a swap, which seems to sometimes not work - * with a D24S8 source, or a texture swizzle which is only supported with - * the 3d path. Sometimes this blit happens on already-constructed - * fdl6_view's, e.g. for sysmem resolves, so this has to happen as a fixup. - */ - if (*src_format == PIPE_FORMAT_S8_UINT && - (dst_format == PIPE_FORMAT_Z24_UNORM_S8_UINT || - dst_format == PIPE_FORMAT_Z24_UNORM_S8_UINT_AS_R8G8B8A8)) { - *fmt = FMT6_A8_UNORM; - *src_format = PIPE_FORMAT_A8_UNORM; - } -} - -static void -fixup_dst_format(enum pipe_format src_format, enum pipe_format *dst_format, - enum a6xx_format *fmt) -{ - if (*dst_format == PIPE_FORMAT_S8_UINT && - (src_format == PIPE_FORMAT_Z24_UNORM_S8_UINT || - src_format == PIPE_FORMAT_Z24_UNORM_S8_UINT_AS_R8G8B8A8)) { - *dst_format = PIPE_FORMAT_A8_UNORM; - *fmt = FMT6_A8_UNORM; - } -} - -static void -r2d_src(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - const struct fdl6_view *iview, - uint32_t layer, - VkFilter filter, - enum pipe_format dst_format) -{ - uint32_t src_info = iview->SP_PS_2D_SRC_INFO; - if (filter != VK_FILTER_NEAREST) - src_info |= A6XX_SP_PS_2D_SRC_INFO_FILTER; - - enum a6xx_format fmt = (src_info & A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT__MASK); - enum pipe_format src_format = iview->format; - fixup_src_format(&src_format, dst_format, &fmt); - - src_info = - (src_info & ~A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT__MASK) | - A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(fmt); - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_INFO, 5); - tu_cs_emit(cs, src_info); - tu_cs_emit(cs, iview->SP_PS_2D_SRC_SIZE); - tu_cs_image_ref_2d(cs, iview, layer, true); - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_FLAGS, 3); - tu_cs_image_flag_ref(cs, iview, layer); -} - -static void -r2d_src_depth(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - const struct tu_image_view *iview, - uint32_t layer, - VkFilter filter) -{ - tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_INFO, 5); - tu_cs_emit(cs, tu_image_view_depth(iview, SP_PS_2D_SRC_INFO)); - tu_cs_emit(cs, iview->view.SP_PS_2D_SRC_SIZE); - tu_cs_emit_qw(cs, iview->depth_base_addr + iview->depth_layer_size * layer); - /* SP_PS_2D_SRC_PITCH has shifted pitch field */ - tu_cs_emit(cs, iview->depth_PITCH << 9); - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_FLAGS, 3); - tu_cs_image_flag_ref(cs, &iview->view, layer); -} - -static void -r2d_src_stencil(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - const struct tu_image_view *iview, - uint32_t layer, - VkFilter filter) -{ - tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_INFO, 5); - tu_cs_emit(cs, tu_image_view_stencil(iview, SP_PS_2D_SRC_INFO) & ~A6XX_SP_PS_2D_SRC_INFO_FLAGS); - tu_cs_emit(cs, iview->view.SP_PS_2D_SRC_SIZE); - tu_cs_emit_qw(cs, iview->stencil_base_addr + iview->stencil_layer_size * layer); - /* SP_PS_2D_SRC_PITCH has shifted pitch field */ - tu_cs_emit(cs, iview->stencil_PITCH << 9); -} - -static void -r2d_src_buffer(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - enum pipe_format format, - uint64_t va, uint32_t pitch, - uint32_t width, uint32_t height, - enum pipe_format dst_format) -{ - struct tu_native_format fmt = tu6_format_texture(format, TILE6_LINEAR); - enum a6xx_format color_format = fmt.fmt; - fixup_src_format(&format, dst_format, &color_format); - - tu_cs_emit_regs(cs, - A6XX_SP_PS_2D_SRC_INFO( - .color_format = color_format, - .color_swap = fmt.swap, - .srgb = util_format_is_srgb(format), - .unk20 = 1, - .unk22 = 1), - A6XX_SP_PS_2D_SRC_SIZE(.width = width, .height = height), - A6XX_SP_PS_2D_SRC(.qword = va), - A6XX_SP_PS_2D_SRC_PITCH(.pitch = pitch)); -} - -static void -r2d_dst(struct tu_cs *cs, const struct fdl6_view *iview, uint32_t layer, - enum pipe_format src_format) -{ - uint32_t dst_info = iview->RB_2D_DST_INFO; - enum a6xx_format fmt = dst_info & A6XX_RB_2D_DST_INFO_COLOR_FORMAT__MASK; - enum pipe_format dst_format = iview->format; - fixup_dst_format(src_format, &dst_format, &fmt); - - dst_info = - (dst_info & ~A6XX_RB_2D_DST_INFO_COLOR_FORMAT__MASK) | fmt; - tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_INFO, 4); - tu_cs_emit(cs, dst_info); - tu_cs_image_ref_2d(cs, iview, layer, false); - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_FLAGS, 3); - tu_cs_image_flag_ref(cs, iview, layer); -} - -static void -r2d_dst_depth(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer) -{ - tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_INFO, 4); - tu_cs_emit(cs, tu_image_view_depth(iview, RB_2D_DST_INFO)); - tu_cs_emit_qw(cs, iview->depth_base_addr + iview->depth_layer_size * layer); - tu_cs_emit(cs, iview->depth_PITCH); - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_FLAGS, 3); - tu_cs_image_flag_ref(cs, &iview->view, layer); -} - -static void -r2d_dst_stencil(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer) -{ - tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_INFO, 4); - tu_cs_emit(cs, tu_image_view_stencil(iview, RB_2D_DST_INFO) & ~A6XX_RB_2D_DST_INFO_FLAGS); - tu_cs_emit_qw(cs, iview->stencil_base_addr + iview->stencil_layer_size * layer); - tu_cs_emit(cs, iview->stencil_PITCH); -} - -static void -r2d_dst_buffer(struct tu_cs *cs, enum pipe_format format, uint64_t va, uint32_t pitch, - enum pipe_format src_format) -{ - struct tu_native_format fmt = tu6_format_color(format, TILE6_LINEAR); - enum a6xx_format color_fmt = fmt.fmt; - fixup_dst_format(src_format, &format, &color_fmt); - fmt.fmt = color_fmt; - - tu_cs_emit_regs(cs, - A6XX_RB_2D_DST_INFO( - .color_format = fmt.fmt, - .color_swap = fmt.swap, - .srgb = util_format_is_srgb(format)), - A6XX_RB_2D_DST(.qword = va), - A6XX_RB_2D_DST_PITCH(pitch)); -} - -static void -r2d_setup_common(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - enum pipe_format src_format, - enum pipe_format dst_format, - VkImageAspectFlags aspect_mask, - unsigned blit_param, - bool clear, - bool ubwc, - bool scissor) -{ - enum a6xx_format fmt = tu6_base_format(dst_format); - fixup_dst_format(src_format, &dst_format, &fmt); - enum a6xx_2d_ifmt ifmt = format_to_ifmt(dst_format); - - uint32_t unknown_8c01 = 0; - - if ((dst_format == PIPE_FORMAT_Z24_UNORM_S8_UINT || - dst_format == PIPE_FORMAT_Z24X8_UNORM) && ubwc) { - fmt = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8; - } - - /* note: the only format with partial clearing is D24S8 */ - if (dst_format == PIPE_FORMAT_Z24_UNORM_S8_UINT) { - /* preserve stencil channel */ - if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) - unknown_8c01 = 0x08000041; - /* preserve depth channels */ - if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) - unknown_8c01 = 0x00084001; - } - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_UNKNOWN_8C01, 1); - tu_cs_emit(cs, unknown_8c01); - - uint32_t blit_cntl = A6XX_RB_2D_BLIT_CNTL( - .scissor = scissor, - .rotate = blit_param, - .solid_color = clear, - .d24s8 = fmt == FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8 && !clear, - .color_format = fmt, - .mask = 0xf, - .ifmt = util_format_is_srgb(dst_format) ? R2D_UNORM8_SRGB : ifmt, - ).value; - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_BLIT_CNTL, 1); - tu_cs_emit(cs, blit_cntl); - - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1); - tu_cs_emit(cs, blit_cntl); - - if (fmt == FMT6_10_10_10_2_UNORM_DEST) - fmt = FMT6_16_16_16_16_FLOAT; - - tu_cs_emit_regs(cs, A6XX_SP_2D_DST_FORMAT( - .sint = util_format_is_pure_sint(dst_format), - .uint = util_format_is_pure_uint(dst_format), - .color_format = fmt, - .srgb = util_format_is_srgb(dst_format), - .mask = 0xf)); -} - -static void -r2d_setup(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - enum pipe_format src_format, - enum pipe_format dst_format, - VkImageAspectFlags aspect_mask, - unsigned blit_param, - bool clear, - bool ubwc, - VkSampleCountFlagBits samples) -{ - assert(samples == VK_SAMPLE_COUNT_1_BIT); - - if (!cmd->state.pass) { - tu_emit_cache_flush_ccu(cmd, cs, TU_CMD_CCU_SYSMEM); - } - - r2d_setup_common(cmd, cs, src_format, dst_format, aspect_mask, blit_param, clear, ubwc, false); -} - -static void -r2d_teardown(struct tu_cmd_buffer *cmd, - struct tu_cs *cs) -{ - /* nothing to do here */ -} - -static void -r2d_run(struct tu_cmd_buffer *cmd, struct tu_cs *cs) -{ - tu_cs_emit_pkt7(cs, CP_BLIT, 1); - tu_cs_emit(cs, CP_BLIT_0_OP(BLIT_OP_SCALE)); -} - -/* r3d_ = shader path operations */ - -static nir_ssa_def * -load_const(nir_builder *b, unsigned base, unsigned components) -{ - return nir_load_uniform(b, components, 32, nir_imm_int(b, 0), - .base = base); -} - -static nir_shader * -build_blit_vs_shader(void) -{ - nir_builder _b = - nir_builder_init_simple_shader(MESA_SHADER_VERTEX, NULL, "blit vs"); - nir_builder *b = &_b; - b->shader->info.internal = true; - - nir_variable *out_pos = - nir_variable_create(b->shader, nir_var_shader_out, glsl_vec4_type(), - "gl_Position"); - out_pos->data.location = VARYING_SLOT_POS; - - nir_ssa_def *vert0_pos = load_const(b, 0, 2); - nir_ssa_def *vert1_pos = load_const(b, 4, 2); - nir_ssa_def *vertex = nir_load_vertex_id(b); - - nir_ssa_def *pos = nir_bcsel(b, nir_i2b1(b, vertex), vert1_pos, vert0_pos); - pos = nir_vec4(b, nir_channel(b, pos, 0), - nir_channel(b, pos, 1), - nir_imm_float(b, 0.0), - nir_imm_float(b, 1.0)); - - nir_store_var(b, out_pos, pos, 0xf); - - nir_variable *out_coords = - nir_variable_create(b->shader, nir_var_shader_out, glsl_vec_type(3), - "coords"); - out_coords->data.location = VARYING_SLOT_VAR0; - - nir_ssa_def *vert0_coords = load_const(b, 2, 2); - nir_ssa_def *vert1_coords = load_const(b, 6, 2); - - /* Only used with "z scale" blit path which uses a 3d texture */ - nir_ssa_def *z_coord = load_const(b, 8, 1); - - nir_ssa_def *coords = nir_bcsel(b, nir_i2b1(b, vertex), vert1_coords, vert0_coords); - coords = nir_vec3(b, nir_channel(b, coords, 0), nir_channel(b, coords, 1), - z_coord); - - nir_store_var(b, out_coords, coords, 0x7); - - return b->shader; -} - -static nir_shader * -build_clear_vs_shader(void) -{ - nir_builder _b = - nir_builder_init_simple_shader(MESA_SHADER_VERTEX, NULL, "blit vs"); - nir_builder *b = &_b; - b->shader->info.internal = true; - - nir_variable *out_pos = - nir_variable_create(b->shader, nir_var_shader_out, glsl_vec4_type(), - "gl_Position"); - out_pos->data.location = VARYING_SLOT_POS; - - nir_ssa_def *vert0_pos = load_const(b, 0, 2); - nir_ssa_def *vert1_pos = load_const(b, 4, 2); - /* c0.z is used to clear depth */ - nir_ssa_def *depth = load_const(b, 2, 1); - nir_ssa_def *vertex = nir_load_vertex_id(b); - - nir_ssa_def *pos = nir_bcsel(b, nir_i2b1(b, vertex), vert1_pos, vert0_pos); - pos = nir_vec4(b, nir_channel(b, pos, 0), - nir_channel(b, pos, 1), - depth, nir_imm_float(b, 1.0)); - - nir_store_var(b, out_pos, pos, 0xf); - - nir_variable *out_layer = - nir_variable_create(b->shader, nir_var_shader_out, glsl_uint_type(), - "gl_Layer"); - out_layer->data.location = VARYING_SLOT_LAYER; - nir_ssa_def *layer = load_const(b, 3, 1); - nir_store_var(b, out_layer, layer, 1); - - return b->shader; -} - -static nir_shader * -build_blit_fs_shader(bool zscale) -{ - nir_builder _b = - nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, - zscale ? "zscale blit fs" : "blit fs"); - nir_builder *b = &_b; - b->shader->info.internal = true; - - nir_variable *out_color = - nir_variable_create(b->shader, nir_var_shader_out, glsl_vec4_type(), - "color0"); - out_color->data.location = FRAG_RESULT_DATA0; - - unsigned coord_components = zscale ? 3 : 2; - nir_variable *in_coords = - nir_variable_create(b->shader, nir_var_shader_in, - glsl_vec_type(coord_components), - "coords"); - in_coords->data.location = VARYING_SLOT_VAR0; - - nir_tex_instr *tex = nir_tex_instr_create(b->shader, 1); - /* Note: since we're just copying data, we rely on the HW ignoring the - * dest_type. - */ - tex->dest_type = nir_type_int32; - tex->is_array = false; - tex->is_shadow = false; - tex->sampler_dim = zscale ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D; - - tex->texture_index = 0; - tex->sampler_index = 0; - - b->shader->info.num_textures = 1; - BITSET_SET(b->shader->info.textures_used, 0); - - tex->src[0].src_type = nir_tex_src_coord; - tex->src[0].src = nir_src_for_ssa(nir_load_var(b, in_coords)); - tex->coord_components = coord_components; - - nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL); - nir_builder_instr_insert(b, &tex->instr); - - nir_store_var(b, out_color, &tex->dest.ssa, 0xf); - - return b->shader; -} - -/* We can only read multisample textures via txf_ms, so we need a separate - * variant for them. - */ -static nir_shader * -build_ms_copy_fs_shader(void) -{ - nir_builder _b = - nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, - "multisample copy fs"); - nir_builder *b = &_b; - b->shader->info.internal = true; - - nir_variable *out_color = - nir_variable_create(b->shader, nir_var_shader_out, glsl_vec4_type(), - "color0"); - out_color->data.location = FRAG_RESULT_DATA0; - - nir_variable *in_coords = - nir_variable_create(b->shader, nir_var_shader_in, - glsl_vec_type(2), - "coords"); - in_coords->data.location = VARYING_SLOT_VAR0; - - nir_tex_instr *tex = nir_tex_instr_create(b->shader, 2); - - tex->op = nir_texop_txf_ms; - - /* Note: since we're just copying data, we rely on the HW ignoring the - * dest_type. - */ - tex->dest_type = nir_type_int32; - tex->is_array = false; - tex->is_shadow = false; - tex->sampler_dim = GLSL_SAMPLER_DIM_MS; - - tex->texture_index = 0; - tex->sampler_index = 0; - - b->shader->info.num_textures = 1; - BITSET_SET(b->shader->info.textures_used, 0); - BITSET_SET(b->shader->info.textures_used_by_txf, 0); - - nir_ssa_def *coord = nir_f2i32(b, nir_load_var(b, in_coords)); - - tex->src[0].src_type = nir_tex_src_coord; - tex->src[0].src = nir_src_for_ssa(coord); - tex->coord_components = 2; - - tex->src[1].src_type = nir_tex_src_ms_index; - tex->src[1].src = nir_src_for_ssa(nir_load_sample_id(b)); - - nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL); - nir_builder_instr_insert(b, &tex->instr); - - nir_store_var(b, out_color, &tex->dest.ssa, 0xf); - - return b->shader; -} - -static nir_shader * -build_clear_fs_shader(unsigned mrts) -{ - nir_builder _b = - nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, - "mrt%u clear fs", mrts); - nir_builder *b = &_b; - b->shader->info.internal = true; - - for (unsigned i = 0; i < mrts; i++) { - nir_variable *out_color = - nir_variable_create(b->shader, nir_var_shader_out, glsl_vec4_type(), - "color"); - out_color->data.location = FRAG_RESULT_DATA0 + i; - - nir_ssa_def *color = load_const(b, 4 * i, 4); - nir_store_var(b, out_color, color, 0xf); - } - - return b->shader; -} - -static void -compile_shader(struct tu_device *dev, struct nir_shader *nir, - unsigned consts, unsigned *offset, enum global_shader idx) -{ - nir->options = ir3_get_compiler_options(dev->compiler); - - nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, nir->info.stage); - nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, nir->info.stage); - - ir3_finalize_nir(dev->compiler, nir); - - struct ir3_shader *sh = - ir3_shader_from_nir(dev->compiler, nir, &(struct ir3_shader_options) { - .api_wavesize = IR3_SINGLE_OR_DOUBLE, - .real_wavesize = IR3_SINGLE_OR_DOUBLE, - .reserved_user_consts = align(consts, 4), - }, NULL); - - struct ir3_shader_key key = {}; - bool created; - struct ir3_shader_variant *so = - ir3_shader_get_variant(sh, &key, false, false, &created); - - struct tu6_global *global = dev->global_bo->map; - - assert(*offset + so->info.sizedwords <= ARRAY_SIZE(global->shaders)); - dev->global_shaders[idx] = sh; - dev->global_shader_variants[idx] = so; - memcpy(&global->shaders[*offset], so->bin, - sizeof(uint32_t) * so->info.sizedwords); - dev->global_shader_va[idx] = dev->global_bo->iova + - gb_offset(shaders[*offset]); - *offset += align(so->info.sizedwords, 32); -} - -void -tu_init_clear_blit_shaders(struct tu_device *dev) -{ - unsigned offset = 0; - compile_shader(dev, build_blit_vs_shader(), 3, &offset, GLOBAL_SH_VS_BLIT); - compile_shader(dev, build_clear_vs_shader(), 2, &offset, GLOBAL_SH_VS_CLEAR); - compile_shader(dev, build_blit_fs_shader(false), 0, &offset, GLOBAL_SH_FS_BLIT); - compile_shader(dev, build_blit_fs_shader(true), 0, &offset, GLOBAL_SH_FS_BLIT_ZSCALE); - compile_shader(dev, build_ms_copy_fs_shader(), 0, &offset, GLOBAL_SH_FS_COPY_MS); - - for (uint32_t num_rts = 0; num_rts <= MAX_RTS; num_rts++) { - compile_shader(dev, build_clear_fs_shader(num_rts), num_rts, &offset, - GLOBAL_SH_FS_CLEAR0 + num_rts); - } -} - -void -tu_destroy_clear_blit_shaders(struct tu_device *dev) -{ - for (unsigned i = 0; i < GLOBAL_SH_COUNT; i++) { - if (dev->global_shaders[i]) - ir3_shader_destroy(dev->global_shaders[i]); - } -} - -static void -r3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool blit, - uint32_t rts_mask, bool z_scale, VkSampleCountFlagBits samples) -{ - enum global_shader vs_id = - blit ? GLOBAL_SH_VS_BLIT : GLOBAL_SH_VS_CLEAR; - - struct ir3_shader_variant *vs = cmd->device->global_shader_variants[vs_id]; - uint64_t vs_iova = cmd->device->global_shader_va[vs_id]; - - enum global_shader fs_id = GLOBAL_SH_FS_BLIT; - - if (z_scale) - fs_id = GLOBAL_SH_FS_BLIT_ZSCALE; - else if (samples != VK_SAMPLE_COUNT_1_BIT) - fs_id = GLOBAL_SH_FS_COPY_MS; - - unsigned num_rts = util_bitcount(rts_mask); - if (!blit) - fs_id = GLOBAL_SH_FS_CLEAR0 + num_rts; - - struct ir3_shader_variant *fs = cmd->device->global_shader_variants[fs_id]; - uint64_t fs_iova = cmd->device->global_shader_va[fs_id]; - - tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD( - .vs_state = true, - .hs_state = true, - .ds_state = true, - .gs_state = true, - .fs_state = true, - .cs_state = true, - .gfx_ibo = true, - .cs_ibo = true, - .gfx_shared_const = true, - .gfx_bindless = 0x1f, - .cs_bindless = 0x1f)); - - tu6_emit_xs_config(cs, MESA_SHADER_VERTEX, vs); - tu6_emit_xs_config(cs, MESA_SHADER_TESS_CTRL, NULL); - tu6_emit_xs_config(cs, MESA_SHADER_TESS_EVAL, NULL); - tu6_emit_xs_config(cs, MESA_SHADER_GEOMETRY, NULL); - tu6_emit_xs_config(cs, MESA_SHADER_FRAGMENT, fs); - - struct tu_pvtmem_config pvtmem = {}; - tu6_emit_xs(cs, MESA_SHADER_VERTEX, vs, &pvtmem, vs_iova); - tu6_emit_xs(cs, MESA_SHADER_FRAGMENT, fs, &pvtmem, fs_iova); - - tu_cs_emit_regs(cs, A6XX_PC_PRIMITIVE_CNTL_0()); - tu_cs_emit_regs(cs, A6XX_VFD_CONTROL_0()); - - if (cmd->device->physical_device->info->a6xx.has_cp_reg_write) { - /* Copy what the blob does here. This will emit an extra 0x3f - * CP_EVENT_WRITE when multiview is disabled. I'm not exactly sure what - * this is working around yet. - */ - tu_cs_emit_pkt7(cs, CP_REG_WRITE, 3); - tu_cs_emit(cs, CP_REG_WRITE_0_TRACKER(UNK_EVENT_WRITE)); - tu_cs_emit(cs, REG_A6XX_PC_MULTIVIEW_CNTL); - tu_cs_emit(cs, 0); - } else { - tu_cs_emit_regs(cs, A6XX_PC_MULTIVIEW_CNTL()); - } - tu_cs_emit_regs(cs, A6XX_VFD_MULTIVIEW_CNTL()); - - tu6_emit_vpc(cs, vs, NULL, NULL, NULL, fs); - - /* REPL_MODE for varying with RECTLIST (2 vertices only) */ - tu_cs_emit_regs(cs, A6XX_VPC_VARYING_INTERP_MODE(0, 0)); - tu_cs_emit_regs(cs, A6XX_VPC_VARYING_PS_REPL_MODE(0, 2 << 2 | 1 << 0)); - - tu6_emit_fs_inputs(cs, fs); - - tu_cs_emit_regs(cs, - A6XX_GRAS_CL_CNTL( - .persp_division_disable = 1, - .vp_xform_disable = 1, - .vp_clip_code_ignore = 1, - .clip_disable = 1)); - tu_cs_emit_regs(cs, A6XX_GRAS_SU_CNTL()); // XXX msaa enable? - - tu_cs_emit_regs(cs, A6XX_PC_RASTER_CNTL()); - tu_cs_emit_regs(cs, A6XX_VPC_UNKNOWN_9107()); - - tu_cs_emit_regs(cs, - A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL(0, .x = 0, .y = 0), - A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR(0, .x = 0x7fff, .y = 0x7fff)); - tu_cs_emit_regs(cs, - A6XX_GRAS_SC_SCREEN_SCISSOR_TL(0, .x = 0, .y = 0), - A6XX_GRAS_SC_SCREEN_SCISSOR_BR(0, .x = 0x7fff, .y = 0x7fff)); - - tu_cs_emit_regs(cs, - A6XX_VFD_INDEX_OFFSET(), - A6XX_VFD_INSTANCE_START_OFFSET()); - - if (rts_mask) { - unsigned rts_count = util_last_bit(rts_mask); - tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_REG(0), rts_count); - unsigned rt = 0; - for (unsigned i = 0; i < rts_count; i++) { - unsigned regid = 0; - if (rts_mask & (1u << i)) - regid = ir3_find_output_regid(fs, FRAG_RESULT_DATA0 + rt++); - tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_REG_REGID(regid)); - } - } - - tu6_emit_msaa(cs, samples, false); -} - -static void -r3d_coords_raw(struct tu_cs *cs, const float *coords) -{ - tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_GEOM, 3 + 8); - tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) | - CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | - CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | - CP_LOAD_STATE6_0_STATE_BLOCK(SB6_VS_SHADER) | - CP_LOAD_STATE6_0_NUM_UNIT(2)); - tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); - tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); - tu_cs_emit_array(cs, (const uint32_t *) coords, 8); -} - -/* z coordinate for "z scale" blit path which uses a 3d texture */ -static void -r3d_coord_z(struct tu_cs *cs, float z) -{ - tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_GEOM, 3 + 4); - tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(2) | - CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | - CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | - CP_LOAD_STATE6_0_STATE_BLOCK(SB6_VS_SHADER) | - CP_LOAD_STATE6_0_NUM_UNIT(1)); - tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); - tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); - tu_cs_emit(cs, fui(z)); - tu_cs_emit(cs, 0); - tu_cs_emit(cs, 0); - tu_cs_emit(cs, 0); -} - -static void -r3d_coords(struct tu_cs *cs, - const VkOffset2D *dst, - const VkOffset2D *src, - const VkExtent2D *extent) -{ - int32_t src_x1 = src ? src->x : 0; - int32_t src_y1 = src ? src->y : 0; - r3d_coords_raw(cs, (float[]) { - dst->x, dst->y, - src_x1, src_y1, - dst->x + extent->width, dst->y + extent->height, - src_x1 + extent->width, src_y1 + extent->height, - }); -} - -static void -r3d_clear_value(struct tu_cs *cs, enum pipe_format format, const VkClearValue *val) -{ - tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_FRAG, 3 + 4); - tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) | - CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | - CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | - CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_SHADER) | - CP_LOAD_STATE6_0_NUM_UNIT(1)); - tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); - tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); - switch (format) { - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_UINT: { - /* cleared as r8g8b8a8_unorm using special format */ - uint32_t tmp = tu_pack_float32_for_unorm(val->depthStencil.depth, 24); - tu_cs_emit(cs, fui((tmp & 0xff) / 255.0f)); - tu_cs_emit(cs, fui((tmp >> 8 & 0xff) / 255.0f)); - tu_cs_emit(cs, fui((tmp >> 16 & 0xff) / 255.0f)); - tu_cs_emit(cs, fui((val->depthStencil.stencil & 0xff) / 255.0f)); - } break; - case PIPE_FORMAT_Z16_UNORM: - case PIPE_FORMAT_Z32_FLOAT: - tu_cs_emit(cs, fui(val->depthStencil.depth)); - tu_cs_emit(cs, 0); - tu_cs_emit(cs, 0); - tu_cs_emit(cs, 0); - break; - case PIPE_FORMAT_S8_UINT: - tu_cs_emit(cs, val->depthStencil.stencil & 0xff); - tu_cs_emit(cs, 0); - tu_cs_emit(cs, 0); - tu_cs_emit(cs, 0); - break; - default: - /* as color formats use clear value as-is */ - assert(!util_format_is_depth_or_stencil(format)); - tu_cs_emit_array(cs, val->color.uint32, 4); - break; - } -} - -static void -r3d_src_common(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - const uint32_t *tex_const, - uint32_t offset_base, - uint32_t offset_ubwc, - VkFilter filter) -{ - struct tu_cs_memory texture = { }; - VkResult result = tu_cs_alloc(&cmd->sub_cs, - 2, /* allocate space for a sampler too */ - A6XX_TEX_CONST_DWORDS, &texture); - if (result != VK_SUCCESS) { - vk_command_buffer_set_error(&cmd->vk, result); - return; - } - - memcpy(texture.map, tex_const, A6XX_TEX_CONST_DWORDS * 4); - - /* patch addresses for layer offset */ - *(uint64_t*) (texture.map + 4) += offset_base; - uint64_t ubwc_addr = (texture.map[7] | (uint64_t) texture.map[8] << 32) + offset_ubwc; - texture.map[7] = ubwc_addr; - texture.map[8] = ubwc_addr >> 32; - - texture.map[A6XX_TEX_CONST_DWORDS + 0] = - A6XX_TEX_SAMP_0_XY_MAG(tu6_tex_filter(filter, false)) | - A6XX_TEX_SAMP_0_XY_MIN(tu6_tex_filter(filter, false)) | - A6XX_TEX_SAMP_0_WRAP_S(A6XX_TEX_CLAMP_TO_EDGE) | - A6XX_TEX_SAMP_0_WRAP_T(A6XX_TEX_CLAMP_TO_EDGE) | - A6XX_TEX_SAMP_0_WRAP_R(A6XX_TEX_CLAMP_TO_EDGE) | - 0x60000; /* XXX used by blob, doesn't seem necessary */ - texture.map[A6XX_TEX_CONST_DWORDS + 1] = - A6XX_TEX_SAMP_1_UNNORM_COORDS | - A6XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR; - texture.map[A6XX_TEX_CONST_DWORDS + 2] = 0; - texture.map[A6XX_TEX_CONST_DWORDS + 3] = 0; - - tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_FRAG, 3); - tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) | - CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) | - CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | - CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_TEX) | - CP_LOAD_STATE6_0_NUM_UNIT(1)); - tu_cs_emit_qw(cs, texture.iova + A6XX_TEX_CONST_DWORDS * 4); - - tu_cs_emit_regs(cs, A6XX_SP_FS_TEX_SAMP(.qword = texture.iova + A6XX_TEX_CONST_DWORDS * 4)); - - tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_FRAG, 3); - tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) | - CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | - CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | - CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_TEX) | - CP_LOAD_STATE6_0_NUM_UNIT(1)); - tu_cs_emit_qw(cs, texture.iova); - - tu_cs_emit_regs(cs, A6XX_SP_FS_TEX_CONST(.qword = texture.iova)); - tu_cs_emit_regs(cs, A6XX_SP_FS_TEX_COUNT(1)); -} - -static void -r3d_src(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - const struct fdl6_view *iview, - uint32_t layer, - VkFilter filter, - enum pipe_format dst_format) -{ - uint32_t desc[A6XX_TEX_CONST_DWORDS]; - memcpy(desc, iview->descriptor, sizeof(desc)); - - enum a6xx_format fmt = (desc[0] & A6XX_TEX_CONST_0_FMT__MASK) >> - A6XX_TEX_CONST_0_FMT__SHIFT; - enum pipe_format src_format = iview->format; - fixup_src_format(&src_format, dst_format, &fmt); - desc[0] = (desc[0] & ~A6XX_TEX_CONST_0_FMT__MASK) | - A6XX_TEX_CONST_0_FMT(fmt); - - r3d_src_common(cmd, cs, desc, - iview->layer_size * layer, - iview->ubwc_layer_size * layer, - filter); -} - -static void -r3d_src_buffer(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - enum pipe_format format, - uint64_t va, uint32_t pitch, - uint32_t width, uint32_t height, - enum pipe_format dst_format) -{ - uint32_t desc[A6XX_TEX_CONST_DWORDS]; - - struct tu_native_format fmt = tu6_format_texture(format, TILE6_LINEAR); - enum a6xx_format color_format = fmt.fmt; - fixup_src_format(&format, dst_format, &color_format); - - desc[0] = - COND(util_format_is_srgb(format), A6XX_TEX_CONST_0_SRGB) | - A6XX_TEX_CONST_0_FMT(color_format) | - A6XX_TEX_CONST_0_SWAP(fmt.swap) | - A6XX_TEX_CONST_0_SWIZ_X(A6XX_TEX_X) | - A6XX_TEX_CONST_0_SWIZ_Y(A6XX_TEX_Y) | - A6XX_TEX_CONST_0_SWIZ_Z(A6XX_TEX_Z) | - A6XX_TEX_CONST_0_SWIZ_W(A6XX_TEX_W); - desc[1] = A6XX_TEX_CONST_1_WIDTH(width) | A6XX_TEX_CONST_1_HEIGHT(height); - desc[2] = - A6XX_TEX_CONST_2_PITCH(pitch) | - A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D); - desc[3] = 0; - desc[4] = va; - desc[5] = va >> 32; - for (uint32_t i = 6; i < A6XX_TEX_CONST_DWORDS; i++) - desc[i] = 0; - - r3d_src_common(cmd, cs, desc, 0, 0, VK_FILTER_NEAREST); -} - -static void -r3d_src_gmem(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - const struct tu_image_view *iview, - enum pipe_format format, - enum pipe_format dst_format, - uint32_t gmem_offset, - uint32_t cpp) -{ - uint32_t desc[A6XX_TEX_CONST_DWORDS]; - memcpy(desc, iview->view.descriptor, sizeof(desc)); - - enum a6xx_format fmt = tu6_format_texture(format, TILE6_LINEAR).fmt; - fixup_src_format(&format, dst_format, &fmt); - - /* patch the format so that depth/stencil get the right format and swizzle */ - desc[0] &= ~(A6XX_TEX_CONST_0_FMT__MASK | - A6XX_TEX_CONST_0_SWIZ_X__MASK | A6XX_TEX_CONST_0_SWIZ_Y__MASK | - A6XX_TEX_CONST_0_SWIZ_Z__MASK | A6XX_TEX_CONST_0_SWIZ_W__MASK); - desc[0] |= A6XX_TEX_CONST_0_FMT(fmt) | - A6XX_TEX_CONST_0_SWIZ_X(A6XX_TEX_X) | - A6XX_TEX_CONST_0_SWIZ_Y(A6XX_TEX_Y) | - A6XX_TEX_CONST_0_SWIZ_Z(A6XX_TEX_Z) | - A6XX_TEX_CONST_0_SWIZ_W(A6XX_TEX_W); - - /* patched for gmem */ - desc[0] &= ~(A6XX_TEX_CONST_0_SWAP__MASK | A6XX_TEX_CONST_0_TILE_MODE__MASK); - desc[0] |= A6XX_TEX_CONST_0_TILE_MODE(TILE6_2); - desc[2] = - A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D) | - A6XX_TEX_CONST_2_PITCH(cmd->state.tiling->tile0.width * cpp); - desc[3] = 0; - desc[4] = cmd->device->physical_device->gmem_base + gmem_offset; - desc[5] = A6XX_TEX_CONST_5_DEPTH(1); - for (unsigned i = 6; i < A6XX_TEX_CONST_DWORDS; i++) - desc[i] = 0; - - r3d_src_common(cmd, cs, desc, 0, 0, VK_FILTER_NEAREST); -} - -static void -r3d_dst(struct tu_cs *cs, const struct fdl6_view *iview, uint32_t layer, - enum pipe_format src_format) -{ - uint32_t mrt_buf_info = iview->RB_MRT_BUF_INFO; - - enum a6xx_format fmt = mrt_buf_info & A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK; - enum pipe_format dst_format = iview->format; - fixup_dst_format(src_format, &dst_format, &fmt); - mrt_buf_info = - (mrt_buf_info & ~A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK) | - A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT(fmt); - tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(0), 6); - tu_cs_emit(cs, mrt_buf_info); - tu_cs_image_ref(cs, iview, layer); - tu_cs_emit(cs, 0); - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_FLAG_BUFFER(0), 3); - tu_cs_image_flag_ref(cs, iview, layer); - - /* Use color format from RB_MRT_BUF_INFO. This register is relevant for - * FMT6_NV12_Y. - */ - tu_cs_emit_regs(cs, A6XX_GRAS_LRZ_MRT_BUF_INFO_0(.color_format = fmt)); - - tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL(.flag_mrts = iview->ubwc_enabled)); -} - -static void -r3d_dst_depth(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer) -{ - tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(0), 6); - tu_cs_emit(cs, tu_image_view_depth(iview, RB_MRT_BUF_INFO)); - tu_cs_image_depth_ref(cs, iview, layer); - tu_cs_emit(cs, 0); - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_FLAG_BUFFER(0), 3); - tu_cs_image_flag_ref(cs, &iview->view, layer); - - tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL(.flag_mrts = iview->view.ubwc_enabled)); -} - -static void -r3d_dst_stencil(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer) -{ - tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(0), 6); - tu_cs_emit(cs, tu_image_view_stencil(iview, RB_MRT_BUF_INFO)); - tu_cs_image_stencil_ref(cs, iview, layer); - tu_cs_emit(cs, 0); - - tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL()); -} - -static void -r3d_dst_buffer(struct tu_cs *cs, enum pipe_format format, uint64_t va, uint32_t pitch, - enum pipe_format src_format) -{ - struct tu_native_format fmt = tu6_format_color(format, TILE6_LINEAR); - - enum a6xx_format color_fmt = fmt.fmt; - fixup_dst_format(src_format, &format, &color_fmt); - - tu_cs_emit_regs(cs, - A6XX_RB_MRT_BUF_INFO(0, .color_format = color_fmt, .color_swap = fmt.swap), - A6XX_RB_MRT_PITCH(0, pitch), - A6XX_RB_MRT_ARRAY_PITCH(0, 0), - A6XX_RB_MRT_BASE(0, .qword = va), - A6XX_RB_MRT_BASE_GMEM(0, 0)); - - tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL()); -} - -static uint8_t -aspect_write_mask(enum pipe_format format, VkImageAspectFlags aspect_mask) -{ - uint8_t mask = 0xf; - assert(aspect_mask); - /* note: the only format with partial writing is D24S8, - * clear/blit uses the _AS_R8G8B8A8 format to access it - */ - if (format == PIPE_FORMAT_Z24_UNORM_S8_UINT) { - if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) - mask = 0x7; - if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) - mask = 0x8; - } - return mask; -} - -static void -r3d_setup(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - enum pipe_format src_format, - enum pipe_format dst_format, - VkImageAspectFlags aspect_mask, - unsigned blit_param, - bool clear, - bool ubwc, - VkSampleCountFlagBits samples) -{ - enum a6xx_format fmt = tu6_base_format(dst_format); - fixup_dst_format(src_format, &dst_format, &fmt); - - if ((dst_format == PIPE_FORMAT_Z24_UNORM_S8_UINT || - dst_format == PIPE_FORMAT_Z24X8_UNORM) && ubwc) { - fmt = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8; - } - - if (!cmd->state.pass) { - tu_emit_cache_flush_ccu(cmd, cs, TU_CMD_CCU_SYSMEM); - tu6_emit_window_scissor(cs, 0, 0, 0x3fff, 0x3fff); - } - - tu_cs_emit_regs(cs, A6XX_GRAS_BIN_CONTROL(.dword = 0xc00000)); - tu_cs_emit_regs(cs, A6XX_RB_BIN_CONTROL(.dword = 0xc00000)); - - r3d_common(cmd, cs, !clear, 1, blit_param, samples); - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_CNTL0, 2); - tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(0xfc) | - A6XX_SP_FS_OUTPUT_CNTL0_SAMPMASK_REGID(0xfc) | - 0xfc000000); - tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL1_MRT(1)); - - tu_cs_emit_regs(cs, - A6XX_RB_FS_OUTPUT_CNTL0(), - A6XX_RB_FS_OUTPUT_CNTL1(.mrt = 1)); - - tu_cs_emit_regs(cs, A6XX_SP_BLEND_CNTL()); - tu_cs_emit_regs(cs, A6XX_RB_BLEND_CNTL(.sample_mask = 0xffff)); - - tu_cs_emit_regs(cs, A6XX_RB_DEPTH_PLANE_CNTL()); - tu_cs_emit_regs(cs, A6XX_RB_DEPTH_CNTL()); - tu_cs_emit_regs(cs, A6XX_GRAS_SU_DEPTH_PLANE_CNTL()); - tu_cs_emit_regs(cs, A6XX_RB_STENCIL_CONTROL()); - tu_cs_emit_regs(cs, A6XX_RB_STENCILMASK()); - tu_cs_emit_regs(cs, A6XX_RB_STENCILWRMASK()); - tu_cs_emit_regs(cs, A6XX_RB_STENCILREF()); - - tu_cs_emit_regs(cs, A6XX_RB_RENDER_COMPONENTS(.rt0 = 0xf)); - tu_cs_emit_regs(cs, A6XX_SP_FS_RENDER_COMPONENTS(.rt0 = 0xf)); - - tu_cs_emit_regs(cs, A6XX_SP_FS_MRT_REG(0, - .color_format = fmt, - .color_sint = util_format_is_pure_sint(dst_format), - .color_uint = util_format_is_pure_uint(dst_format))); - - tu_cs_emit_regs(cs, A6XX_RB_MRT_CONTROL(0, - .component_enable = aspect_write_mask(dst_format, aspect_mask))); - tu_cs_emit_regs(cs, A6XX_RB_SRGB_CNTL(util_format_is_srgb(dst_format))); - tu_cs_emit_regs(cs, A6XX_SP_SRGB_CNTL(util_format_is_srgb(dst_format))); - - tu_cs_emit_regs(cs, A6XX_GRAS_LRZ_CNTL(0)); - tu_cs_emit_regs(cs, A6XX_RB_LRZ_CNTL(0)); - - tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_SC_CNTL, - A6XX_GRAS_SC_CNTL_CCUSINGLECACHELINESIZE(2)); - - /* Disable sample counting in order to not affect occlusion query. */ - tu_cs_emit_regs(cs, A6XX_RB_SAMPLE_COUNT_CONTROL(.disable = true)); - - if (cmd->state.prim_generated_query_running_before_rp) { - tu6_emit_event_write(cmd, cs, STOP_PRIMITIVE_CTRS); - } - - if (cmd->state.predication_active) { - tu_cs_emit_pkt7(cs, CP_DRAW_PRED_ENABLE_LOCAL, 1); - tu_cs_emit(cs, 0); - } -} - -static void -r3d_run(struct tu_cmd_buffer *cmd, struct tu_cs *cs) -{ - tu_cs_emit_pkt7(cs, CP_DRAW_INDX_OFFSET, 3); - tu_cs_emit(cs, CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(DI_PT_RECTLIST) | - CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_AUTO_INDEX) | - CP_DRAW_INDX_OFFSET_0_VIS_CULL(IGNORE_VISIBILITY)); - tu_cs_emit(cs, 1); /* instance count */ - tu_cs_emit(cs, 2); /* vertex count */ -} - -static void -r3d_run_vis(struct tu_cmd_buffer *cmd, struct tu_cs *cs) -{ - tu_cs_emit_pkt7(cs, CP_DRAW_INDX_OFFSET, 3); - tu_cs_emit(cs, CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(DI_PT_RECTLIST) | - CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_AUTO_INDEX) | - CP_DRAW_INDX_OFFSET_0_VIS_CULL(USE_VISIBILITY)); - tu_cs_emit(cs, 1); /* instance count */ - tu_cs_emit(cs, 2); /* vertex count */ -} - -static void -r3d_teardown(struct tu_cmd_buffer *cmd, struct tu_cs *cs) -{ - if (cmd->state.predication_active) { - tu_cs_emit_pkt7(cs, CP_DRAW_PRED_ENABLE_LOCAL, 1); - tu_cs_emit(cs, 1); - } - - /* Re-enable sample counting. */ - tu_cs_emit_regs(cs, A6XX_RB_SAMPLE_COUNT_CONTROL(.disable = false)); - - if (cmd->state.prim_generated_query_running_before_rp) { - tu6_emit_event_write(cmd, cs, START_PRIMITIVE_CTRS); - } -} - -/* blit ops - common interface for 2d/shader paths */ - -struct blit_ops { - void (*coords)(struct tu_cs *cs, - const VkOffset2D *dst, - const VkOffset2D *src, - const VkExtent2D *extent); - void (*clear_value)(struct tu_cs *cs, enum pipe_format format, const VkClearValue *val); - void (*src)( - struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - const struct fdl6_view *iview, - uint32_t layer, - VkFilter filter, - enum pipe_format dst_format); - void (*src_buffer)(struct tu_cmd_buffer *cmd, struct tu_cs *cs, - enum pipe_format format, - uint64_t va, uint32_t pitch, - uint32_t width, uint32_t height, - enum pipe_format dst_format); - void (*dst)(struct tu_cs *cs, const struct fdl6_view *iview, uint32_t layer, - enum pipe_format src_format); - void (*dst_depth)(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer); - void (*dst_stencil)(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer); - void (*dst_buffer)(struct tu_cs *cs, enum pipe_format format, uint64_t va, uint32_t pitch, - enum pipe_format src_format); - void (*setup)(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - enum pipe_format src_format, - enum pipe_format dst_format, - VkImageAspectFlags aspect_mask, - unsigned blit_param, /* CmdBlitImage: rotation in 2D path and z scaling in 3D path */ - bool clear, - bool ubwc, - VkSampleCountFlagBits samples); - void (*run)(struct tu_cmd_buffer *cmd, struct tu_cs *cs); - void (*teardown)(struct tu_cmd_buffer *cmd, - struct tu_cs *cs); -}; - -static const struct blit_ops r2d_ops = { - .coords = r2d_coords, - .clear_value = r2d_clear_value, - .src = r2d_src, - .src_buffer = r2d_src_buffer, - .dst = r2d_dst, - .dst_depth = r2d_dst_depth, - .dst_stencil = r2d_dst_stencil, - .dst_buffer = r2d_dst_buffer, - .setup = r2d_setup, - .run = r2d_run, - .teardown = r2d_teardown, -}; - -static const struct blit_ops r3d_ops = { - .coords = r3d_coords, - .clear_value = r3d_clear_value, - .src = r3d_src, - .src_buffer = r3d_src_buffer, - .dst = r3d_dst, - .dst_depth = r3d_dst_depth, - .dst_stencil = r3d_dst_stencil, - .dst_buffer = r3d_dst_buffer, - .setup = r3d_setup, - .run = r3d_run, - .teardown = r3d_teardown, -}; - -/* passthrough set coords from 3D extents */ -static void -coords(const struct blit_ops *ops, - struct tu_cs *cs, - const VkOffset3D *dst, - const VkOffset3D *src, - const VkExtent3D *extent) -{ - ops->coords(cs, (const VkOffset2D*) dst, (const VkOffset2D*) src, (const VkExtent2D*) extent); -} - -/* Decides the VK format to treat our data as for a memcpy-style blit. We have - * to be a bit careful because we have to pick a format with matching UBWC - * compression behavior, so no just returning R8_UINT/R16_UINT/R32_UINT for - * everything. - */ -static enum pipe_format -copy_format(VkFormat vk_format, VkImageAspectFlags aspect_mask) -{ - if (vk_format_is_compressed(vk_format)) { - switch (vk_format_get_blocksize(vk_format)) { - case 1: return PIPE_FORMAT_R8_UINT; - case 2: return PIPE_FORMAT_R16_UINT; - case 4: return PIPE_FORMAT_R32_UINT; - case 8: return PIPE_FORMAT_R32G32_UINT; - case 16:return PIPE_FORMAT_R32G32B32A32_UINT; - default: - unreachable("unhandled format size"); - } - } - - enum pipe_format format = tu_vk_format_to_pipe_format(vk_format); - - /* For SNORM formats, copy them as the equivalent UNORM format. If we treat - * them as snorm then the 0x80 (-1.0 snorm8) value will get clamped to 0x81 - * (also -1.0), when we're supposed to be memcpying the bits. See - * https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/2917 for discussion. - */ - format = util_format_snorm_to_unorm(format); - - switch (format) { - case PIPE_FORMAT_R9G9B9E5_FLOAT: - return PIPE_FORMAT_R32_UINT; - - case PIPE_FORMAT_G8_B8R8_420_UNORM: - if (aspect_mask == VK_IMAGE_ASPECT_PLANE_1_BIT) - return PIPE_FORMAT_R8G8_UNORM; - else - return PIPE_FORMAT_Y8_UNORM; - case PIPE_FORMAT_G8_B8_R8_420_UNORM: - return PIPE_FORMAT_R8_UNORM; - - case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: - if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) - return PIPE_FORMAT_S8_UINT; - assert(aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT); - return PIPE_FORMAT_Z32_FLOAT; - - default: - return format; - } -} - -void -tu6_clear_lrz(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - struct tu_image *image, - const VkClearValue *value) -{ - const struct blit_ops *ops = &r2d_ops; - - /* It is assumed that LRZ cache is invalidated at this point for - * the writes here to become visible to LRZ. - * - * LRZ writes are going through UCHE cache, flush UCHE before changing - * LRZ via CCU. Don't need to invalidate CCU since we are presumably - * writing whole cache lines we assume to be 64 bytes. - */ - tu6_emit_event_write(cmd, &cmd->cs, CACHE_FLUSH_TS); - - ops->setup(cmd, cs, PIPE_FORMAT_Z16_UNORM, PIPE_FORMAT_Z16_UNORM, - VK_IMAGE_ASPECT_DEPTH_BIT, 0, true, false, - VK_SAMPLE_COUNT_1_BIT); - ops->clear_value(cs, PIPE_FORMAT_Z16_UNORM, value); - ops->dst_buffer(cs, PIPE_FORMAT_Z16_UNORM, - image->iova + image->lrz_offset, - image->lrz_pitch * 2, PIPE_FORMAT_Z16_UNORM); - ops->coords(cs, &(VkOffset2D) {}, NULL, &(VkExtent2D) {image->lrz_pitch, image->lrz_height}); - ops->run(cmd, cs); - ops->teardown(cmd, cs); - - /* Clearing writes via CCU color in the PS stage, and LRZ is read via - * UCHE in the earlier GRAS stage. - */ - cmd->state.cache.flush_bits |= - TU_CMD_FLAG_CCU_FLUSH_COLOR | TU_CMD_FLAG_CACHE_INVALIDATE | - TU_CMD_FLAG_WAIT_FOR_IDLE; -} - -void -tu6_dirty_lrz_fc(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - struct tu_image *image) -{ - const struct blit_ops *ops = &r2d_ops; - VkClearValue clear = { .color = { .uint32[0] = 0xffffffff } }; - - /* LRZ fast-clear buffer is always allocated with 512 bytes size. */ - ops->setup(cmd, cs, PIPE_FORMAT_R32_UINT, PIPE_FORMAT_R32_UINT, - VK_IMAGE_ASPECT_COLOR_BIT, 0, true, false, - VK_SAMPLE_COUNT_1_BIT); - ops->clear_value(cs, PIPE_FORMAT_R32_UINT, &clear); - ops->dst_buffer(cs, PIPE_FORMAT_R32_UINT, - image->iova + image->lrz_fc_offset, 512, - PIPE_FORMAT_R32_UINT); - ops->coords(cs, &(VkOffset2D) {}, NULL, &(VkExtent2D) {128, 1}); - ops->run(cmd, cs); - ops->teardown(cmd, cs); -} - -static void -tu_image_view_copy_blit(struct fdl6_view *iview, - struct tu_image *image, - enum pipe_format format, - const VkImageSubresourceLayers *subres, - uint32_t layer, - bool z_scale) -{ - VkImageAspectFlags aspect_mask = subres->aspectMask; - - /* always use the AS_R8G8B8A8 format for these */ - if (format == PIPE_FORMAT_Z24_UNORM_S8_UINT || - format == PIPE_FORMAT_Z24X8_UNORM) { - aspect_mask = VK_IMAGE_ASPECT_COLOR_BIT; - } - - const struct fdl_layout *layout = - &image->layout[tu6_plane_index(image->vk.format, aspect_mask)]; - - fdl6_view_init(iview, &layout, &(struct fdl_view_args) { - .iova = image->iova, - .base_array_layer = subres->baseArrayLayer + layer, - .layer_count = 1, - .base_miplevel = subres->mipLevel, - .level_count = 1, - .format = tu_format_for_aspect(format, aspect_mask), - .swiz = { - PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W - }, - .type = z_scale ? FDL_VIEW_TYPE_3D : FDL_VIEW_TYPE_2D, - }, false); -} - -static void -tu_image_view_copy(struct fdl6_view *iview, - struct tu_image *image, - enum pipe_format format, - const VkImageSubresourceLayers *subres, - uint32_t layer) -{ - tu_image_view_copy_blit(iview, image, format, subres, layer, false); -} - -static void -tu_image_view_blit(struct fdl6_view *iview, - struct tu_image *image, - const VkImageSubresourceLayers *subres, - uint32_t layer) -{ - enum pipe_format format = - tu6_plane_format(image->vk.format, tu6_plane_index(image->vk.format, - subres->aspectMask)); - tu_image_view_copy_blit(iview, image, format, subres, layer, false); -} - -static void -tu6_blit_image(struct tu_cmd_buffer *cmd, - struct tu_image *src_image, - struct tu_image *dst_image, - const VkImageBlit2 *info, - VkFilter filter) -{ - const struct blit_ops *ops = &r2d_ops; - struct tu_cs *cs = &cmd->cs; - bool z_scale = false; - uint32_t layers = info->dstOffsets[1].z - info->dstOffsets[0].z; - - /* 2D blit can't do rotation mirroring from just coordinates */ - static const enum a6xx_rotation rotate[2][2] = { - {ROTATE_0, ROTATE_HFLIP}, - {ROTATE_VFLIP, ROTATE_180}, - }; - - bool mirror_x = (info->srcOffsets[1].x < info->srcOffsets[0].x) != - (info->dstOffsets[1].x < info->dstOffsets[0].x); - bool mirror_y = (info->srcOffsets[1].y < info->srcOffsets[0].y) != - (info->dstOffsets[1].y < info->dstOffsets[0].y); - - int32_t src0_z = info->srcOffsets[0].z; - int32_t src1_z = info->srcOffsets[1].z; - - if ((info->srcOffsets[1].z - info->srcOffsets[0].z != - info->dstOffsets[1].z - info->dstOffsets[0].z) || - info->srcOffsets[1].z < info->srcOffsets[0].z) { - z_scale = true; - } - - if (info->dstOffsets[1].z < info->dstOffsets[0].z) { - layers = info->dstOffsets[0].z - info->dstOffsets[1].z; - src0_z = info->srcOffsets[1].z; - src1_z = info->srcOffsets[0].z; - } - - if (info->dstSubresource.layerCount > 1) { - assert(layers <= 1); - layers = info->dstSubresource.layerCount; - } - - /* BC1_RGB_* formats need to have their last components overriden with 1 - * when sampling, which is normally handled with the texture descriptor - * swizzle. The 2d path can't handle that, so use the 3d path. - * - * TODO: we could use RB_2D_BLIT_CNTL::MASK to make these formats work with - * the 2d path. - */ - - unsigned blit_param = rotate[mirror_y][mirror_x]; - if (dst_image->layout[0].nr_samples > 1 || - src_image->vk.format == VK_FORMAT_BC1_RGB_UNORM_BLOCK || - src_image->vk.format == VK_FORMAT_BC1_RGB_SRGB_BLOCK || - filter == VK_FILTER_CUBIC_EXT || - z_scale) { - ops = &r3d_ops; - blit_param = z_scale; - } - - /* use the right format in setup() for D32_S8 - * TODO: this probably should use a helper - */ - enum pipe_format src_format = - tu6_plane_format(src_image->vk.format, - tu6_plane_index(src_image->vk.format, - info->srcSubresource.aspectMask)); - enum pipe_format dst_format = - tu6_plane_format(dst_image->vk.format, - tu6_plane_index(src_image->vk.format, - info->srcSubresource.aspectMask)); - trace_start_blit(&cmd->trace, cs); - - ops->setup(cmd, cs, src_format, dst_format, info->dstSubresource.aspectMask, - blit_param, false, dst_image->layout[0].ubwc, - dst_image->layout[0].nr_samples); - - if (ops == &r3d_ops) { - r3d_coords_raw(cs, (float[]) { - info->dstOffsets[0].x, info->dstOffsets[0].y, - info->srcOffsets[0].x, info->srcOffsets[0].y, - info->dstOffsets[1].x, info->dstOffsets[1].y, - info->srcOffsets[1].x, info->srcOffsets[1].y - }); - } else { - tu_cs_emit_regs(cs, - A6XX_GRAS_2D_DST_TL(.x = MIN2(info->dstOffsets[0].x, info->dstOffsets[1].x), - .y = MIN2(info->dstOffsets[0].y, info->dstOffsets[1].y)), - A6XX_GRAS_2D_DST_BR(.x = MAX2(info->dstOffsets[0].x, info->dstOffsets[1].x) - 1, - .y = MAX2(info->dstOffsets[0].y, info->dstOffsets[1].y) - 1)); - tu_cs_emit_regs(cs, - A6XX_GRAS_2D_SRC_TL_X(MIN2(info->srcOffsets[0].x, info->srcOffsets[1].x)), - A6XX_GRAS_2D_SRC_BR_X(MAX2(info->srcOffsets[0].x, info->srcOffsets[1].x) - 1), - A6XX_GRAS_2D_SRC_TL_Y(MIN2(info->srcOffsets[0].y, info->srcOffsets[1].y)), - A6XX_GRAS_2D_SRC_BR_Y(MAX2(info->srcOffsets[0].y, info->srcOffsets[1].y) - 1)); - } - - struct fdl6_view dst, src; - tu_image_view_blit(&dst, dst_image, &info->dstSubresource, - MIN2(info->dstOffsets[0].z, info->dstOffsets[1].z)); - - if (z_scale) { - tu_image_view_copy_blit(&src, src_image, src_format, - &info->srcSubresource, 0, true); - ops->src(cmd, cs, &src, 0, filter, dst_format); - } else { - tu_image_view_blit(&src, src_image, &info->srcSubresource, info->srcOffsets[0].z); - } - - for (uint32_t i = 0; i < layers; i++) { - if (z_scale) { - float t = ((float) i + 0.5f) / (float) layers; - r3d_coord_z(cs, t * (src1_z - src0_z) + src0_z); - } else { - ops->src(cmd, cs, &src, i, filter, dst_format); - } - ops->dst(cs, &dst, i, src_format); - ops->run(cmd, cs); - } - - ops->teardown(cmd, cs); - - trace_end_blit(&cmd->trace, cs, - ops == &r3d_ops, - src_image->vk.format, - dst_image->vk.format, - layers); -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdBlitImage2KHR(VkCommandBuffer commandBuffer, - const VkBlitImageInfo2* pBlitImageInfo) - -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - TU_FROM_HANDLE(tu_image, src_image, pBlitImageInfo->srcImage); - TU_FROM_HANDLE(tu_image, dst_image, pBlitImageInfo->dstImage); - - for (uint32_t i = 0; i < pBlitImageInfo->regionCount; ++i) { - /* can't blit both depth and stencil at once with D32_S8 - * TODO: more advanced 3D blit path to support it instead? - */ - if (src_image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT || - dst_image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) { - VkImageBlit2 region = pBlitImageInfo->pRegions[i]; - u_foreach_bit(b, region.dstSubresource.aspectMask) { - region.srcSubresource.aspectMask = BIT(b); - region.dstSubresource.aspectMask = BIT(b); - tu6_blit_image(cmd, src_image, dst_image, ®ion, pBlitImageInfo->filter); - } - continue; - } - tu6_blit_image(cmd, src_image, dst_image, pBlitImageInfo->pRegions + i, - pBlitImageInfo->filter); - } - - if (dst_image->lrz_height) { - tu_disable_lrz(cmd, &cmd->cs, dst_image); - } -} - -static void -copy_compressed(VkFormat format, - VkOffset3D *offset, - VkExtent3D *extent, - uint32_t *width, - uint32_t *height) -{ - if (!vk_format_is_compressed(format)) - return; - - uint32_t block_width = vk_format_get_blockwidth(format); - uint32_t block_height = vk_format_get_blockheight(format); - - offset->x /= block_width; - offset->y /= block_height; - - if (extent) { - extent->width = DIV_ROUND_UP(extent->width, block_width); - extent->height = DIV_ROUND_UP(extent->height, block_height); - } - if (width) - *width = DIV_ROUND_UP(*width, block_width); - if (height) - *height = DIV_ROUND_UP(*height, block_height); -} - -static void -tu_copy_buffer_to_image(struct tu_cmd_buffer *cmd, - struct tu_buffer *src_buffer, - struct tu_image *dst_image, - const VkBufferImageCopy2 *info) -{ - struct tu_cs *cs = &cmd->cs; - uint32_t layers = MAX2(info->imageExtent.depth, info->imageSubresource.layerCount); - enum pipe_format src_format = - copy_format(dst_image->vk.format, info->imageSubresource.aspectMask); - enum pipe_format dst_format = - copy_format(dst_image->vk.format, info->imageSubresource.aspectMask); - const struct blit_ops *ops = &r2d_ops; - - /* special case for buffer to stencil */ - if (dst_image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT && - info->imageSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) { - src_format = PIPE_FORMAT_S8_UINT; - } - - /* note: could use "R8_UNORM" when no UBWC */ - if (src_format == PIPE_FORMAT_Y8_UNORM) - ops = &r3d_ops; - - VkOffset3D offset = info->imageOffset; - VkExtent3D extent = info->imageExtent; - uint32_t src_width = info->bufferRowLength ?: extent.width; - uint32_t src_height = info->bufferImageHeight ?: extent.height; - - copy_compressed(dst_image->vk.format, &offset, &extent, &src_width, &src_height); - - uint32_t pitch = src_width * util_format_get_blocksize(src_format); - uint32_t layer_size = src_height * pitch; - - ops->setup(cmd, cs, src_format, dst_format, - info->imageSubresource.aspectMask, 0, false, dst_image->layout[0].ubwc, - dst_image->layout[0].nr_samples); - - struct fdl6_view dst; - tu_image_view_copy(&dst, dst_image, dst_format, &info->imageSubresource, offset.z); - - for (uint32_t i = 0; i < layers; i++) { - ops->dst(cs, &dst, i, src_format); - - uint64_t src_va = src_buffer->iova + info->bufferOffset + layer_size * i; - if ((src_va & 63) || (pitch & 63)) { - for (uint32_t y = 0; y < extent.height; y++) { - uint32_t x = (src_va & 63) / util_format_get_blocksize(src_format); - ops->src_buffer(cmd, cs, src_format, src_va & ~63, pitch, - x + extent.width, 1, dst_format); - ops->coords(cs, &(VkOffset2D){offset.x, offset.y + y}, &(VkOffset2D){x}, - &(VkExtent2D) {extent.width, 1}); - ops->run(cmd, cs); - src_va += pitch; - } - } else { - ops->src_buffer(cmd, cs, src_format, src_va, pitch, extent.width, extent.height, dst_format); - coords(ops, cs, &offset, &(VkOffset3D){}, &extent); - ops->run(cmd, cs); - } - } - - ops->teardown(cmd, cs); -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdCopyBufferToImage2KHR(VkCommandBuffer commandBuffer, - const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - TU_FROM_HANDLE(tu_image, dst_image, pCopyBufferToImageInfo->dstImage); - TU_FROM_HANDLE(tu_buffer, src_buffer, pCopyBufferToImageInfo->srcBuffer); - - for (unsigned i = 0; i < pCopyBufferToImageInfo->regionCount; ++i) - tu_copy_buffer_to_image(cmd, src_buffer, dst_image, - pCopyBufferToImageInfo->pRegions + i); - - if (dst_image->lrz_height) { - tu_disable_lrz(cmd, &cmd->cs, dst_image); - } -} - -static void -tu_copy_image_to_buffer(struct tu_cmd_buffer *cmd, - struct tu_image *src_image, - struct tu_buffer *dst_buffer, - const VkBufferImageCopy2 *info) -{ - struct tu_cs *cs = &cmd->cs; - uint32_t layers = MAX2(info->imageExtent.depth, info->imageSubresource.layerCount); - enum pipe_format dst_format = - copy_format(src_image->vk.format, info->imageSubresource.aspectMask); - enum pipe_format src_format = - copy_format(src_image->vk.format, info->imageSubresource.aspectMask); - const struct blit_ops *ops = &r2d_ops; - - if (src_image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT && - info->imageSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) { - dst_format = PIPE_FORMAT_S8_UINT; - } - - /* note: could use "R8_UNORM" when no UBWC */ - if (dst_format == PIPE_FORMAT_Y8_UNORM) - ops = &r3d_ops; - - VkOffset3D offset = info->imageOffset; - VkExtent3D extent = info->imageExtent; - uint32_t dst_width = info->bufferRowLength ?: extent.width; - uint32_t dst_height = info->bufferImageHeight ?: extent.height; - - copy_compressed(src_image->vk.format, &offset, &extent, &dst_width, &dst_height); - - uint32_t pitch = dst_width * util_format_get_blocksize(dst_format); - uint32_t layer_size = pitch * dst_height; - - ops->setup(cmd, cs, src_format, dst_format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false, false, - VK_SAMPLE_COUNT_1_BIT); - - struct fdl6_view src; - tu_image_view_copy(&src, src_image, src_format, &info->imageSubresource, offset.z); - - for (uint32_t i = 0; i < layers; i++) { - ops->src(cmd, cs, &src, i, VK_FILTER_NEAREST, dst_format); - - uint64_t dst_va = dst_buffer->iova + info->bufferOffset + layer_size * i; - if ((dst_va & 63) || (pitch & 63)) { - for (uint32_t y = 0; y < extent.height; y++) { - uint32_t x = (dst_va & 63) / util_format_get_blocksize(dst_format); - ops->dst_buffer(cs, dst_format, dst_va & ~63, 0, src_format); - ops->coords(cs, &(VkOffset2D) {x}, &(VkOffset2D){offset.x, offset.y + y}, - &(VkExtent2D) {extent.width, 1}); - ops->run(cmd, cs); - dst_va += pitch; - } - } else { - ops->dst_buffer(cs, dst_format, dst_va, pitch, src_format); - coords(ops, cs, &(VkOffset3D) {0, 0}, &offset, &extent); - ops->run(cmd, cs); - } - } - - ops->teardown(cmd, cs); -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdCopyImageToBuffer2KHR(VkCommandBuffer commandBuffer, - const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - TU_FROM_HANDLE(tu_image, src_image, pCopyImageToBufferInfo->srcImage); - TU_FROM_HANDLE(tu_buffer, dst_buffer, pCopyImageToBufferInfo->dstBuffer); - - for (unsigned i = 0; i < pCopyImageToBufferInfo->regionCount; ++i) - tu_copy_image_to_buffer(cmd, src_image, dst_buffer, - pCopyImageToBufferInfo->pRegions + i); -} - -/* Tiled formats don't support swapping, which means that we can't support - * formats that require a non-WZYX swap like B8G8R8A8 natively. Also, some - * formats like B5G5R5A1 have a separate linear-only format when sampling. - * Currently we fake support for tiled swapped formats and use the unswapped - * format instead, but this means that reinterpreting copies to and from - * swapped formats can't be performed correctly unless we can swizzle the - * components by reinterpreting the other image as the "correct" swapped - * format, i.e. only when the other image is linear. - */ - -static bool -is_swapped_format(enum pipe_format format) -{ - struct tu_native_format linear = tu6_format_texture(format, TILE6_LINEAR); - struct tu_native_format tiled = tu6_format_texture(format, TILE6_3); - return linear.fmt != tiled.fmt || linear.swap != tiled.swap; -} - -/* R8G8_* formats have a different tiling layout than other cpp=2 formats, and - * therefore R8G8 images can't be reinterpreted as non-R8G8 images (and vice - * versa). This should mirror the logic in fdl6_layout. - */ -static bool -image_is_r8g8(struct tu_image *image) -{ - return image->layout[0].cpp == 2 && - vk_format_get_nr_components(image->vk.format) == 2; -} - -static void -tu_copy_image_to_image(struct tu_cmd_buffer *cmd, - struct tu_image *src_image, - struct tu_image *dst_image, - const VkImageCopy2 *info) -{ - const struct blit_ops *ops = &r2d_ops; - struct tu_cs *cs = &cmd->cs; - - if (dst_image->layout[0].nr_samples > 1) - ops = &r3d_ops; - - enum pipe_format format = PIPE_FORMAT_NONE; - VkOffset3D src_offset = info->srcOffset; - VkOffset3D dst_offset = info->dstOffset; - VkExtent3D extent = info->extent; - uint32_t layers_to_copy = MAX2(info->extent.depth, info->srcSubresource.layerCount); - - /* From the Vulkan 1.2.140 spec, section 19.3 "Copying Data Between - * Images": - * - * When copying between compressed and uncompressed formats the extent - * members represent the texel dimensions of the source image and not - * the destination. When copying from a compressed image to an - * uncompressed image the image texel dimensions written to the - * uncompressed image will be source extent divided by the compressed - * texel block dimensions. When copying from an uncompressed image to a - * compressed image the image texel dimensions written to the compressed - * image will be the source extent multiplied by the compressed texel - * block dimensions. - * - * This means we only have to adjust the extent if the source image is - * compressed. - */ - copy_compressed(src_image->vk.format, &src_offset, &extent, NULL, NULL); - copy_compressed(dst_image->vk.format, &dst_offset, NULL, NULL, NULL); - - enum pipe_format dst_format = copy_format(dst_image->vk.format, info->dstSubresource.aspectMask); - enum pipe_format src_format = copy_format(src_image->vk.format, info->srcSubresource.aspectMask); - - /* note: could use "R8_UNORM" when no UBWC */ - if (dst_format == PIPE_FORMAT_Y8_UNORM || - src_format == PIPE_FORMAT_Y8_UNORM) - ops = &r3d_ops; - - bool use_staging_blit = false; - - if (src_format == dst_format) { - /* Images that share a format can always be copied directly because it's - * the same as a blit. - */ - format = src_format; - } else if (!src_image->layout[0].tile_mode) { - /* If an image is linear, we can always safely reinterpret it with the - * other image's format and then do a regular blit. - */ - format = dst_format; - } else if (!dst_image->layout[0].tile_mode) { - format = src_format; - } else if (image_is_r8g8(src_image) != image_is_r8g8(dst_image)) { - /* We can't currently copy r8g8 images to/from other cpp=2 images, - * due to the different tile layout. - */ - use_staging_blit = true; - } else if (is_swapped_format(src_format) || - is_swapped_format(dst_format)) { - /* If either format has a non-identity swap, then we can't copy - * to/from it. - */ - use_staging_blit = true; - } else if (!src_image->layout[0].ubwc) { - format = dst_format; - } else if (!dst_image->layout[0].ubwc) { - format = src_format; - } else { - /* Both formats use UBWC and so neither can be reinterpreted. - * TODO: We could do an in-place decompression of the dst instead. - */ - perf_debug(cmd->device, "TODO: Do in-place UBWC decompression for UBWC->UBWC blits"); - use_staging_blit = true; - } - - struct fdl6_view dst, src; - - if (use_staging_blit) { - tu_image_view_copy(&dst, dst_image, dst_format, &info->dstSubresource, dst_offset.z); - tu_image_view_copy(&src, src_image, src_format, &info->srcSubresource, src_offset.z); - - struct fdl_layout staging_layout = { 0 }; - VkOffset3D staging_offset = { 0 }; - - staging_layout.tile_mode = TILE6_LINEAR; - staging_layout.ubwc = false; - - fdl6_layout(&staging_layout, - src_format, - src_image->layout[0].nr_samples, - extent.width, - extent.height, - extent.depth, - 1, - info->srcSubresource.layerCount, - extent.depth > 1, - NULL); - - struct tu_bo *staging_bo; - VkResult result = tu_get_scratch_bo(cmd->device, - staging_layout.size, - &staging_bo); - if (result != VK_SUCCESS) { - vk_command_buffer_set_error(&cmd->vk, result); - return; - } - - struct fdl6_view staging; - const struct fdl_layout *staging_layout_ptr = &staging_layout; - fdl6_view_init(&staging, &staging_layout_ptr, &(struct fdl_view_args) { - .iova = staging_bo->iova, - .base_array_layer = 0, - .layer_count = 1, - .base_miplevel = 0, - .level_count = info->srcSubresource.layerCount, - .format = tu_format_for_aspect(src_format, VK_IMAGE_ASPECT_COLOR_BIT), - .swiz = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W }, - .type = FDL_VIEW_TYPE_2D, - }, false); - - ops->setup(cmd, cs, src_format, src_format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false, false, - dst_image->layout[0].nr_samples); - coords(ops, cs, &staging_offset, &src_offset, &extent); - - for (uint32_t i = 0; i < layers_to_copy; i++) { - ops->src(cmd, cs, &src, i, VK_FILTER_NEAREST, src_format); - ops->dst(cs, &staging, i, src_format); - ops->run(cmd, cs); - } - - /* When executed by the user there has to be a pipeline barrier here, - * but since we're doing it manually we'll have to flush ourselves. - */ - tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS); - tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE); - tu_cs_emit_wfi(cs); - - fdl6_view_init(&staging, &staging_layout_ptr, &(struct fdl_view_args) { - .iova = staging_bo->iova, - .base_array_layer = 0, - .layer_count = 1, - .base_miplevel = 0, - .level_count = info->srcSubresource.layerCount, - .format = tu_format_for_aspect(dst_format, VK_IMAGE_ASPECT_COLOR_BIT), - .swiz = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W }, - .type = FDL_VIEW_TYPE_2D, - }, false); - - ops->setup(cmd, cs, dst_format, dst_format, info->dstSubresource.aspectMask, - 0, false, dst_image->layout[0].ubwc, - dst_image->layout[0].nr_samples); - coords(ops, cs, &dst_offset, &staging_offset, &extent); - - for (uint32_t i = 0; i < layers_to_copy; i++) { - ops->src(cmd, cs, &staging, i, VK_FILTER_NEAREST, dst_format); - ops->dst(cs, &dst, i, dst_format); - ops->run(cmd, cs); - } - } else { - tu_image_view_copy(&dst, dst_image, format, &info->dstSubresource, dst_offset.z); - tu_image_view_copy(&src, src_image, format, &info->srcSubresource, src_offset.z); - - ops->setup(cmd, cs, format, format, info->dstSubresource.aspectMask, - 0, false, dst_image->layout[0].ubwc, - dst_image->layout[0].nr_samples); - coords(ops, cs, &dst_offset, &src_offset, &extent); - - for (uint32_t i = 0; i < layers_to_copy; i++) { - ops->src(cmd, cs, &src, i, VK_FILTER_NEAREST, format); - ops->dst(cs, &dst, i, format); - ops->run(cmd, cs); - } - } - - ops->teardown(cmd, cs); -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdCopyImage2KHR(VkCommandBuffer commandBuffer, - const VkCopyImageInfo2* pCopyImageInfo) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - TU_FROM_HANDLE(tu_image, src_image, pCopyImageInfo->srcImage); - TU_FROM_HANDLE(tu_image, dst_image, pCopyImageInfo->dstImage); - - for (uint32_t i = 0; i < pCopyImageInfo->regionCount; ++i) { - if (src_image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) { - VkImageCopy2 info = pCopyImageInfo->pRegions[i]; - u_foreach_bit(b, info.dstSubresource.aspectMask) { - info.srcSubresource.aspectMask = BIT(b); - info.dstSubresource.aspectMask = BIT(b); - tu_copy_image_to_image(cmd, src_image, dst_image, &info); - } - continue; - } - - tu_copy_image_to_image(cmd, src_image, dst_image, - pCopyImageInfo->pRegions + i); - } - - if (dst_image->lrz_height) { - tu_disable_lrz(cmd, &cmd->cs, dst_image); - } -} - -static void -copy_buffer(struct tu_cmd_buffer *cmd, - uint64_t dst_va, - uint64_t src_va, - uint64_t size, - uint32_t block_size) -{ - const struct blit_ops *ops = &r2d_ops; - struct tu_cs *cs = &cmd->cs; - enum pipe_format format = block_size == 4 ? PIPE_FORMAT_R32_UINT : PIPE_FORMAT_R8_UNORM; - uint64_t blocks = size / block_size; - - ops->setup(cmd, cs, format, format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false, false, - VK_SAMPLE_COUNT_1_BIT); - - while (blocks) { - uint32_t src_x = (src_va & 63) / block_size; - uint32_t dst_x = (dst_va & 63) / block_size; - uint32_t width = MIN2(MIN2(blocks, 0x4000 - src_x), 0x4000 - dst_x); - - ops->src_buffer(cmd, cs, format, src_va & ~63, 0, src_x + width, 1, format); - ops->dst_buffer( cs, format, dst_va & ~63, 0, format); - ops->coords(cs, &(VkOffset2D) {dst_x}, &(VkOffset2D) {src_x}, &(VkExtent2D) {width, 1}); - ops->run(cmd, cs); - - src_va += width * block_size; - dst_va += width * block_size; - blocks -= width; - } - - ops->teardown(cmd, cs); -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdCopyBuffer2KHR(VkCommandBuffer commandBuffer, - const VkCopyBufferInfo2 *pCopyBufferInfo) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - TU_FROM_HANDLE(tu_buffer, src_buffer, pCopyBufferInfo->srcBuffer); - TU_FROM_HANDLE(tu_buffer, dst_buffer, pCopyBufferInfo->dstBuffer); - - for (unsigned i = 0; i < pCopyBufferInfo->regionCount; ++i) { - const VkBufferCopy2 *region = &pCopyBufferInfo->pRegions[i]; - copy_buffer(cmd, - dst_buffer->iova + region->dstOffset, - src_buffer->iova + region->srcOffset, - region->size, 1); - } -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdUpdateBuffer(VkCommandBuffer commandBuffer, - VkBuffer dstBuffer, - VkDeviceSize dstOffset, - VkDeviceSize dataSize, - const void *pData) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - TU_FROM_HANDLE(tu_buffer, buffer, dstBuffer); - - struct tu_cs_memory tmp; - VkResult result = tu_cs_alloc(&cmd->sub_cs, DIV_ROUND_UP(dataSize, 64), 64 / 4, &tmp); - if (result != VK_SUCCESS) { - vk_command_buffer_set_error(&cmd->vk, result); - return; - } - - memcpy(tmp.map, pData, dataSize); - copy_buffer(cmd, buffer->iova + dstOffset, tmp.iova, dataSize, 4); -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdFillBuffer(VkCommandBuffer commandBuffer, - VkBuffer dstBuffer, - VkDeviceSize dstOffset, - VkDeviceSize fillSize, - uint32_t data) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - TU_FROM_HANDLE(tu_buffer, buffer, dstBuffer); - const struct blit_ops *ops = &r2d_ops; - struct tu_cs *cs = &cmd->cs; - - fillSize = vk_buffer_range(&buffer->vk, dstOffset, fillSize); - - uint64_t dst_va = buffer->iova + dstOffset; - uint32_t blocks = fillSize / 4; - - ops->setup(cmd, cs, PIPE_FORMAT_R32_UINT, PIPE_FORMAT_R32_UINT, - VK_IMAGE_ASPECT_COLOR_BIT, 0, true, false, - VK_SAMPLE_COUNT_1_BIT); - ops->clear_value(cs, PIPE_FORMAT_R32_UINT, &(VkClearValue){.color = {.uint32[0] = data}}); - - while (blocks) { - uint32_t dst_x = (dst_va & 63) / 4; - uint32_t width = MIN2(blocks, 0x4000 - dst_x); - - ops->dst_buffer(cs, PIPE_FORMAT_R32_UINT, dst_va & ~63, 0, PIPE_FORMAT_R32_UINT); - ops->coords(cs, &(VkOffset2D) {dst_x}, NULL, &(VkExtent2D) {width, 1}); - ops->run(cmd, cs); - - dst_va += width * 4; - blocks -= width; - } - - ops->teardown(cmd, cs); -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdResolveImage2KHR(VkCommandBuffer commandBuffer, - const VkResolveImageInfo2* pResolveImageInfo) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - TU_FROM_HANDLE(tu_image, src_image, pResolveImageInfo->srcImage); - TU_FROM_HANDLE(tu_image, dst_image, pResolveImageInfo->dstImage); - const struct blit_ops *ops = &r2d_ops; - struct tu_cs *cs = &cmd->cs; - - enum pipe_format src_format = - tu_vk_format_to_pipe_format(src_image->vk.format); - enum pipe_format dst_format = - tu_vk_format_to_pipe_format(dst_image->vk.format); - ops->setup(cmd, cs, src_format, dst_format, - VK_IMAGE_ASPECT_COLOR_BIT, 0, false, dst_image->layout[0].ubwc, - VK_SAMPLE_COUNT_1_BIT); - - for (uint32_t i = 0; i < pResolveImageInfo->regionCount; ++i) { - const VkImageResolve2 *info = &pResolveImageInfo->pRegions[i]; - uint32_t layers = MAX2(info->extent.depth, info->dstSubresource.layerCount); - - assert(info->srcSubresource.layerCount == info->dstSubresource.layerCount); - /* TODO: aspect masks possible ? */ - - coords(ops, cs, &info->dstOffset, &info->srcOffset, &info->extent); - - struct fdl6_view dst, src; - tu_image_view_blit(&dst, dst_image, &info->dstSubresource, info->dstOffset.z); - tu_image_view_blit(&src, src_image, &info->srcSubresource, info->srcOffset.z); - - for (uint32_t i = 0; i < layers; i++) { - ops->src(cmd, cs, &src, i, VK_FILTER_NEAREST, dst_format); - ops->dst(cs, &dst, i, src_format); - ops->run(cmd, cs); - } - } - - ops->teardown(cmd, cs); -} - -#define for_each_layer(layer, layer_mask, layers) \ - for (uint32_t layer = 0; \ - layer < ((layer_mask) ? (util_logbase2(layer_mask) + 1) : layers); \ - layer++) \ - if (!layer_mask || (layer_mask & BIT(layer))) - -static void -resolve_sysmem(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - VkFormat vk_src_format, - VkFormat vk_dst_format, - const struct tu_image_view *src, - const struct tu_image_view *dst, - uint32_t layer_mask, - uint32_t layers, - const VkRect2D *rect, - bool src_separate_ds, - bool dst_separate_ds) -{ - const struct blit_ops *ops = &r2d_ops; - - trace_start_sysmem_resolve(&cmd->trace, cs); - - enum pipe_format src_format = tu_vk_format_to_pipe_format(vk_src_format); - enum pipe_format dst_format = tu_vk_format_to_pipe_format(vk_dst_format); - - ops->setup(cmd, cs, src_format, dst_format, - VK_IMAGE_ASPECT_COLOR_BIT, 0, false, dst->view.ubwc_enabled, - VK_SAMPLE_COUNT_1_BIT); - ops->coords(cs, &rect->offset, &rect->offset, &rect->extent); - - for_each_layer(i, layer_mask, layers) { - if (src_separate_ds) { - if (vk_src_format == VK_FORMAT_D32_SFLOAT || vk_dst_format == VK_FORMAT_D32_SFLOAT) { - r2d_src_depth(cmd, cs, src, i, VK_FILTER_NEAREST); - } else { - r2d_src_stencil(cmd, cs, src, i, VK_FILTER_NEAREST); - } - } else { - ops->src(cmd, cs, &src->view, i, VK_FILTER_NEAREST, dst_format); - } - - if (dst_separate_ds) { - if (vk_dst_format == VK_FORMAT_D32_SFLOAT) { - ops->dst_depth(cs, dst, i); - } else { - ops->dst_stencil(cs, dst, i); - } - } else { - ops->dst(cs, &dst->view, i, src_format); - } - - ops->run(cmd, cs); - } - - ops->teardown(cmd, cs); - - trace_end_sysmem_resolve(&cmd->trace, cs, vk_dst_format); -} - -void -tu_resolve_sysmem(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - const struct tu_image_view *src, - const struct tu_image_view *dst, - uint32_t layer_mask, - uint32_t layers, - const VkRect2D *rect) -{ - assert(src->image->vk.format == dst->image->vk.format || - (vk_format_is_depth_or_stencil(src->image->vk.format) && - vk_format_is_depth_or_stencil(dst->image->vk.format))); - - bool src_separate_ds = src->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT; - bool dst_separate_ds = dst->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT; - - if (dst_separate_ds) { - resolve_sysmem(cmd, cs, VK_FORMAT_D32_SFLOAT, VK_FORMAT_D32_SFLOAT, - src, dst, layer_mask, layers, rect, - src_separate_ds, dst_separate_ds); - resolve_sysmem(cmd, cs, VK_FORMAT_S8_UINT, VK_FORMAT_S8_UINT, - src, dst, layer_mask, layers, rect, - src_separate_ds, dst_separate_ds); - } else { - resolve_sysmem(cmd, cs, src->image->vk.format, dst->image->vk.format, - src, dst, layer_mask, layers, rect, - src_separate_ds, dst_separate_ds); - } -} - -static void -clear_image(struct tu_cmd_buffer *cmd, - struct tu_image *image, - const VkClearValue *clear_value, - const VkImageSubresourceRange *range, - VkImageAspectFlags aspect_mask) -{ - uint32_t level_count = vk_image_subresource_level_count(&image->vk, range); - uint32_t layer_count = vk_image_subresource_layer_count(&image->vk, range); - struct tu_cs *cs = &cmd->cs; - enum pipe_format format; - if (image->vk.format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) { - format = PIPE_FORMAT_R32_UINT; - } else { - format = tu6_plane_format(image->vk.format, - tu6_plane_index(image->vk.format, - aspect_mask)); - } - - if (image->layout[0].depth0 > 1) { - assert(layer_count == 1); - assert(range->baseArrayLayer == 0); - } - - const struct blit_ops *ops = image->layout[0].nr_samples > 1 ? &r3d_ops : &r2d_ops; - - ops->setup(cmd, cs, format, format, aspect_mask, 0, true, image->layout[0].ubwc, - image->layout[0].nr_samples); - if (image->vk.format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) - ops->clear_value(cs, PIPE_FORMAT_R9G9B9E5_FLOAT, clear_value); - else - ops->clear_value(cs, format, clear_value); - - for (unsigned j = 0; j < level_count; j++) { - if (image->layout[0].depth0 > 1) - layer_count = u_minify(image->layout[0].depth0, range->baseMipLevel + j); - - ops->coords(cs, &(VkOffset2D){}, NULL, &(VkExtent2D) { - u_minify(image->layout[0].width0, range->baseMipLevel + j), - u_minify(image->layout[0].height0, range->baseMipLevel + j) - }); - - struct fdl6_view dst; - tu_image_view_copy_blit(&dst, image, format, &(VkImageSubresourceLayers) { - .aspectMask = aspect_mask, - .mipLevel = range->baseMipLevel + j, - .baseArrayLayer = range->baseArrayLayer, - .layerCount = 1, - }, 0, false); - - for (uint32_t i = 0; i < layer_count; i++) { - ops->dst(cs, &dst, i, format); - ops->run(cmd, cs); - } - } - - ops->teardown(cmd, cs); -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdClearColorImage(VkCommandBuffer commandBuffer, - VkImage image_h, - VkImageLayout imageLayout, - const VkClearColorValue *pColor, - uint32_t rangeCount, - const VkImageSubresourceRange *pRanges) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - TU_FROM_HANDLE(tu_image, image, image_h); - - for (unsigned i = 0; i < rangeCount; i++) - clear_image(cmd, image, (const VkClearValue*) pColor, pRanges + i, VK_IMAGE_ASPECT_COLOR_BIT); -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, - VkImage image_h, - VkImageLayout imageLayout, - const VkClearDepthStencilValue *pDepthStencil, - uint32_t rangeCount, - const VkImageSubresourceRange *pRanges) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - TU_FROM_HANDLE(tu_image, image, image_h); - - for (unsigned i = 0; i < rangeCount; i++) { - const VkImageSubresourceRange *range = &pRanges[i]; - - if (image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) { - /* can't clear both depth and stencil at once, split up the aspect mask */ - u_foreach_bit(b, range->aspectMask) - clear_image(cmd, image, (const VkClearValue*) pDepthStencil, range, BIT(b)); - continue; - } - - clear_image(cmd, image, (const VkClearValue*) pDepthStencil, range, range->aspectMask); - } - - tu_lrz_clear_depth_image(cmd, image, pDepthStencil, rangeCount, pRanges); -} - -static void -tu_clear_sysmem_attachments(struct tu_cmd_buffer *cmd, - uint32_t attachment_count, - const VkClearAttachment *attachments, - uint32_t rect_count, - const VkClearRect *rects) -{ - /* the shader path here is special, it avoids changing MRT/etc state */ - const struct tu_subpass *subpass = cmd->state.subpass; - const uint32_t mrt_count = subpass->color_count; - struct tu_cs *cs = &cmd->draw_cs; - uint32_t clear_value[MAX_RTS][4]; - float z_clear_val = 0.0f; - uint8_t s_clear_val = 0; - uint32_t clear_rts = 0, clear_components = 0; - bool z_clear = false; - bool s_clear = false; - - trace_start_sysmem_clear_all(&cmd->trace, cs); - - for (uint32_t i = 0; i < attachment_count; i++) { - uint32_t a; - if (attachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { - uint32_t c = attachments[i].colorAttachment; - a = subpass->color_attachments[c].attachment; - if (a == VK_ATTACHMENT_UNUSED) - continue; - - clear_rts |= 1 << c; - clear_components |= 0xf << (c * 4); - memcpy(clear_value[c], &attachments[i].clearValue, 4 * sizeof(uint32_t)); - } else { - a = subpass->depth_stencil_attachment.attachment; - if (a == VK_ATTACHMENT_UNUSED) - continue; - - if (attachments[i].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) { - z_clear = true; - z_clear_val = attachments[i].clearValue.depthStencil.depth; - } - - if (attachments[i].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) { - s_clear = true; - s_clear_val = attachments[i].clearValue.depthStencil.stencil & 0xff; - } - } - } - - /* We may not know the multisample count if there are no attachments, so - * just bail early to avoid corner cases later. - */ - if (clear_rts == 0 && !z_clear && !s_clear) - return; - - /* disable all draw states so they don't interfere - * TODO: use and re-use draw states - * we have to disable draw states individually to preserve - * input attachment states, because a secondary command buffer - * won't be able to restore them - */ - tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * (TU_DRAW_STATE_COUNT - 2)); - for (uint32_t i = 0; i < TU_DRAW_STATE_COUNT; i++) { - if (i == TU_DRAW_STATE_INPUT_ATTACHMENTS_GMEM || - i == TU_DRAW_STATE_INPUT_ATTACHMENTS_SYSMEM) - continue; - tu_cs_emit(cs, CP_SET_DRAW_STATE__0_GROUP_ID(i) | - CP_SET_DRAW_STATE__0_DISABLE); - tu_cs_emit_qw(cs, 0); - } - cmd->state.dirty |= TU_CMD_DIRTY_DRAW_STATE; - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_CNTL0, 2); - tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(0xfc) | - A6XX_SP_FS_OUTPUT_CNTL0_SAMPMASK_REGID(0xfc) | - 0xfc000000); - tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL1_MRT(mrt_count)); - - r3d_common(cmd, cs, false, clear_rts, false, cmd->state.subpass->samples); - - /* Disable sample counting in order to not affect occlusion query. */ - tu_cs_emit_regs(cs, A6XX_RB_SAMPLE_COUNT_CONTROL(.disable = true)); - - if (cmd->state.prim_generated_query_running_before_rp) { - tu6_emit_event_write(cmd, cs, STOP_PRIMITIVE_CTRS); - } - - tu_cs_emit_regs(cs, - A6XX_SP_FS_RENDER_COMPONENTS(.dword = clear_components)); - tu_cs_emit_regs(cs, - A6XX_RB_RENDER_COMPONENTS(.dword = clear_components)); - - tu_cs_emit_regs(cs, - A6XX_RB_FS_OUTPUT_CNTL0(), - A6XX_RB_FS_OUTPUT_CNTL1(.mrt = mrt_count)); - - tu_cs_emit_regs(cs, A6XX_SP_BLEND_CNTL()); - tu_cs_emit_regs(cs, A6XX_RB_BLEND_CNTL(.independent_blend = 1, .sample_mask = 0xffff)); - for (uint32_t i = 0; i < mrt_count; i++) { - tu_cs_emit_regs(cs, A6XX_RB_MRT_CONTROL(i, - .component_enable = COND(clear_rts & (1 << i), 0xf))); - } - - tu_cs_emit_regs(cs, A6XX_GRAS_LRZ_CNTL(0)); - tu_cs_emit_regs(cs, A6XX_RB_LRZ_CNTL(0)); - - tu_cs_emit_regs(cs, A6XX_RB_DEPTH_PLANE_CNTL()); - tu_cs_emit_regs(cs, A6XX_RB_DEPTH_CNTL( - .z_test_enable = z_clear, - .z_write_enable = z_clear, - .zfunc = FUNC_ALWAYS)); - tu_cs_emit_regs(cs, A6XX_GRAS_SU_DEPTH_PLANE_CNTL()); - tu_cs_emit_regs(cs, A6XX_RB_STENCIL_CONTROL( - .stencil_enable = s_clear, - .func = FUNC_ALWAYS, - .zpass = STENCIL_REPLACE)); - tu_cs_emit_regs(cs, A6XX_RB_STENCILMASK(.mask = 0xff)); - tu_cs_emit_regs(cs, A6XX_RB_STENCILWRMASK(.wrmask = 0xff)); - tu_cs_emit_regs(cs, A6XX_RB_STENCILREF(.ref = s_clear_val)); - - tu_cs_emit_regs(cs, A6XX_GRAS_SC_CNTL(.ccusinglecachelinesize = 2)); - - unsigned num_rts = util_bitcount(clear_rts); - tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_FRAG, 3 + 4 * num_rts); - tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) | - CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | - CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | - CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_SHADER) | - CP_LOAD_STATE6_0_NUM_UNIT(num_rts)); - tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); - tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); - u_foreach_bit(b, clear_rts) - tu_cs_emit_array(cs, clear_value[b], 4); - - for (uint32_t i = 0; i < rect_count; i++) { - /* This should be true because of this valid usage for - * vkCmdClearAttachments: - * - * "If the render pass instance this is recorded in uses multiview, - * then baseArrayLayer must be zero and layerCount must be one" - */ - assert(!subpass->multiview_mask || rects[i].baseArrayLayer == 0); - - /* a630 doesn't support multiview masks, which means that we can't use - * the normal multiview path without potentially recompiling a shader - * on-demand or using a more complicated variant that takes the mask as - * a const. Just use the layered path instead, since it shouldn't be - * much worse. - */ - for_each_layer(layer, subpass->multiview_mask, rects[i].layerCount) { - r3d_coords_raw(cs, (float[]) { - rects[i].rect.offset.x, rects[i].rect.offset.y, - z_clear_val, uif(rects[i].baseArrayLayer + layer), - rects[i].rect.offset.x + rects[i].rect.extent.width, - rects[i].rect.offset.y + rects[i].rect.extent.height, - z_clear_val, 1.0f, - }); - r3d_run_vis(cmd, cs); - } - } - - /* Re-enable sample counting. */ - tu_cs_emit_regs(cs, A6XX_RB_SAMPLE_COUNT_CONTROL(.disable = false)); - - if (cmd->state.prim_generated_query_running_before_rp) { - tu6_emit_event_write(cmd, cs, START_PRIMITIVE_CTRS); - } - - trace_end_sysmem_clear_all(&cmd->trace, - cs, mrt_count, rect_count); -} - -static void -pack_gmem_clear_value(const VkClearValue *val, enum pipe_format format, uint32_t clear_value[4]) -{ - switch (format) { - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_UINT: - clear_value[0] = tu_pack_float32_for_unorm(val->depthStencil.depth, 24) | - val->depthStencil.stencil << 24; - return; - case PIPE_FORMAT_Z16_UNORM: - clear_value[0] = tu_pack_float32_for_unorm(val->depthStencil.depth, 16); - return; - case PIPE_FORMAT_Z32_FLOAT: - clear_value[0] = fui(val->depthStencil.depth); - return; - case PIPE_FORMAT_S8_UINT: - clear_value[0] = val->depthStencil.stencil; - return; - default: - break; - } - - float tmp[4]; - memcpy(tmp, val->color.float32, 4 * sizeof(float)); - if (util_format_is_srgb(format)) { - for (int i = 0; i < 3; i++) - tmp[i] = util_format_linear_to_srgb_float(tmp[i]); - } - -#define PACK_F(type) util_format_##type##_pack_rgba_float \ - ( (uint8_t*) &clear_value[0], 0, tmp, 0, 1, 1) - switch (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_X)) { - case 4: - PACK_F(r4g4b4a4_unorm); - break; - case 5: - if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_Y) == 6) - PACK_F(r5g6b5_unorm); - else - PACK_F(r5g5b5a1_unorm); - break; - case 8: - if (util_format_is_snorm(format)) - PACK_F(r8g8b8a8_snorm); - else if (util_format_is_unorm(format)) - PACK_F(r8g8b8a8_unorm); - else - pack_int8(clear_value, val->color.uint32); - break; - case 10: - if (util_format_is_pure_integer(format)) - pack_int10_2(clear_value, val->color.uint32); - else - PACK_F(r10g10b10a2_unorm); - break; - case 11: - clear_value[0] = float3_to_r11g11b10f(val->color.float32); - break; - case 16: - if (util_format_is_snorm(format)) - PACK_F(r16g16b16a16_snorm); - else if (util_format_is_unorm(format)) - PACK_F(r16g16b16a16_unorm); - else if (util_format_is_float(format)) - PACK_F(r16g16b16a16_float); - else - pack_int16(clear_value, val->color.uint32); - break; - case 32: - memcpy(clear_value, val->color.float32, 4 * sizeof(float)); - break; - default: - unreachable("unexpected channel size"); - } -#undef PACK_F -} - -static void -clear_gmem_attachment(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - enum pipe_format format, - uint8_t clear_mask, - uint32_t gmem_offset, - const VkClearValue *value) -{ - tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 1); - tu_cs_emit(cs, A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(tu6_base_format(format))); - - tu_cs_emit_regs(cs, A6XX_RB_BLIT_INFO(.gmem = 1, .clear_mask = clear_mask)); - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_BASE_GMEM, 1); - tu_cs_emit(cs, gmem_offset); - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_88D0, 1); - tu_cs_emit(cs, 0); - - uint32_t clear_vals[4] = {}; - pack_gmem_clear_value(value, format, clear_vals); - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4); - tu_cs_emit_array(cs, clear_vals, 4); - - tu6_emit_event_write(cmd, cs, BLIT); -} - -static void -tu_emit_clear_gmem_attachment(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - uint32_t attachment, - VkImageAspectFlags mask, - const VkClearValue *value) -{ - const struct tu_render_pass_attachment *att = - &cmd->state.pass->attachments[attachment]; - - trace_start_gmem_clear(&cmd->trace, cs); - - tu_cs_emit_regs(cs, - A6XX_RB_BLIT_GMEM_MSAA_CNTL(tu_msaa_samples(att->samples))); - - enum pipe_format format = tu_vk_format_to_pipe_format(att->format); - if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) { - if (mask & VK_IMAGE_ASPECT_DEPTH_BIT) - clear_gmem_attachment(cmd, cs, PIPE_FORMAT_Z32_FLOAT, 0xf, tu_attachment_gmem_offset(cmd, att), value); - if (mask & VK_IMAGE_ASPECT_STENCIL_BIT) - clear_gmem_attachment(cmd, cs, PIPE_FORMAT_S8_UINT, 0xf, tu_attachment_gmem_offset_stencil(cmd, att), value); - } else { - clear_gmem_attachment(cmd, cs, format, aspect_write_mask(format, mask), - tu_attachment_gmem_offset(cmd, att), value); - } - - trace_end_gmem_clear(&cmd->trace, cs, att->format, att->samples); -} - -static void -tu_clear_gmem_attachments(struct tu_cmd_buffer *cmd, - uint32_t attachment_count, - const VkClearAttachment *attachments, - uint32_t rect_count, - const VkClearRect *rects) -{ - const struct tu_subpass *subpass = cmd->state.subpass; - struct tu_cs *cs = &cmd->draw_cs; - - if (rect_count > 1) - perf_debug(cmd->device, "TODO: Swap tu_clear_gmem_attachments() loop for smaller command stream"); - - for (unsigned i = 0; i < rect_count; i++) { - unsigned x1 = rects[i].rect.offset.x; - unsigned y1 = rects[i].rect.offset.y; - unsigned x2 = x1 + rects[i].rect.extent.width - 1; - unsigned y2 = y1 + rects[i].rect.extent.height - 1; - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_SCISSOR_TL, 2); - tu_cs_emit(cs, A6XX_RB_BLIT_SCISSOR_TL_X(x1) | A6XX_RB_BLIT_SCISSOR_TL_Y(y1)); - tu_cs_emit(cs, A6XX_RB_BLIT_SCISSOR_BR_X(x2) | A6XX_RB_BLIT_SCISSOR_BR_Y(y2)); - - for (unsigned j = 0; j < attachment_count; j++) { - uint32_t a; - if (attachments[j].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) - a = subpass->color_attachments[attachments[j].colorAttachment].attachment; - else - a = subpass->depth_stencil_attachment.attachment; - - if (a == VK_ATTACHMENT_UNUSED) - continue; - - tu_emit_clear_gmem_attachment(cmd, cs, a, attachments[j].aspectMask, - &attachments[j].clearValue); - } - } -} - -VKAPI_ATTR void VKAPI_CALL -tu_CmdClearAttachments(VkCommandBuffer commandBuffer, - uint32_t attachmentCount, - const VkClearAttachment *pAttachments, - uint32_t rectCount, - const VkClearRect *pRects) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - struct tu_cs *cs = &cmd->draw_cs; - - /* sysmem path behaves like a draw, note we don't have a way of using different - * flushes for sysmem/gmem, so this needs to be outside of the cond_exec - */ - tu_emit_cache_flush_renderpass(cmd, cs); - - for (uint32_t j = 0; j < attachmentCount; j++) { - if ((pAttachments[j].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) == 0) - continue; - - tu_lrz_disable_during_renderpass(cmd); - } - - /* vkCmdClearAttachments is supposed to respect the predicate if active. The - * easiest way to do this is to always use the 3d path, which always works - * even with GMEM because it's just a simple draw using the existing - * attachment state. - * - * Similarly, we also use the 3D path when in a secondary command buffer that - * doesn't know the GMEM layout that will be chosen by the primary. - */ - if (cmd->state.predication_active || cmd->state.gmem_layout == TU_GMEM_LAYOUT_COUNT) { - tu_clear_sysmem_attachments(cmd, attachmentCount, pAttachments, rectCount, pRects); - return; - } - - /* If we could skip tile load/stores based on any draws intersecting them at - * binning time, then emit the clear as a 3D draw so that it contributes to - * that visibility. - */ - const struct tu_subpass *subpass = cmd->state.subpass; - for (uint32_t i = 0; i < attachmentCount; i++) { - uint32_t a; - if (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { - uint32_t c = pAttachments[i].colorAttachment; - a = subpass->color_attachments[c].attachment; - } else { - a = subpass->depth_stencil_attachment.attachment; - } - if (a != VK_ATTACHMENT_UNUSED) { - const struct tu_render_pass_attachment *att = &cmd->state.pass->attachments[a]; - if (att->cond_load_allowed || att->cond_store_allowed) { - tu_clear_sysmem_attachments(cmd, attachmentCount, pAttachments, rectCount, pRects); - return; - } - } - } - - /* Otherwise, emit 2D blits for gmem rendering. */ - tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_GMEM); - tu_clear_gmem_attachments(cmd, attachmentCount, pAttachments, rectCount, pRects); - tu_cond_exec_end(cs); - - tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_SYSMEM); - tu_clear_sysmem_attachments(cmd, attachmentCount, pAttachments, rectCount, pRects); - tu_cond_exec_end(cs); -} - -static void -clear_sysmem_attachment(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - VkFormat vk_format, - VkImageAspectFlags clear_mask, - const VkClearValue *value, - uint32_t a, - bool separate_ds) -{ - enum pipe_format format = tu_vk_format_to_pipe_format(vk_format); - const struct tu_framebuffer *fb = cmd->state.framebuffer; - const struct tu_image_view *iview = cmd->state.attachments[a]; - const uint32_t clear_views = cmd->state.pass->attachments[a].clear_views; - const struct blit_ops *ops = &r2d_ops; - if (cmd->state.pass->attachments[a].samples > 1) - ops = &r3d_ops; - - trace_start_sysmem_clear(&cmd->trace, cs); - - ops->setup(cmd, cs, format, format, clear_mask, 0, true, iview->view.ubwc_enabled, - cmd->state.pass->attachments[a].samples); - ops->coords(cs, &cmd->state.render_area.offset, NULL, - &cmd->state.render_area.extent); - ops->clear_value(cs, format, value); - - for_each_layer(i, clear_views, fb->layers) { - if (separate_ds) { - if (vk_format == VK_FORMAT_D32_SFLOAT) { - ops->dst_depth(cs, iview, i); - } else { - ops->dst_stencil(cs, iview, i); - } - } else { - ops->dst(cs, &iview->view, i, format); - } - ops->run(cmd, cs); - } - - ops->teardown(cmd, cs); - - trace_end_sysmem_clear(&cmd->trace, cs, - vk_format, ops == &r3d_ops, - cmd->state.pass->attachments[a].samples); -} - -void -tu_clear_sysmem_attachment(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - uint32_t a, - const VkClearValue *value) -{ - const struct tu_render_pass_attachment *attachment = - &cmd->state.pass->attachments[a]; - - if (!attachment->clear_mask) - return; - - if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT) { - if (attachment->clear_mask & VK_IMAGE_ASPECT_DEPTH_BIT) { - clear_sysmem_attachment(cmd, cs, VK_FORMAT_D32_SFLOAT, VK_IMAGE_ASPECT_COLOR_BIT, - value, a, true); - } - if (attachment->clear_mask & VK_IMAGE_ASPECT_STENCIL_BIT) { - clear_sysmem_attachment(cmd, cs, VK_FORMAT_S8_UINT, VK_IMAGE_ASPECT_COLOR_BIT, - value, a, true); - } - } else { - clear_sysmem_attachment(cmd, cs, attachment->format, attachment->clear_mask, - value, a, false); - } - - /* The spec doesn't explicitly say, but presumably the initial renderpass - * clear is considered part of the renderpass, and therefore barriers - * aren't required inside the subpass/renderpass. Therefore we need to - * flush CCU color into CCU depth here, just like with - * vkCmdClearAttachments(). Note that because this only happens at the - * beginning of a renderpass, and renderpass writes are considered - * "incoherent", we shouldn't have to worry about syncing depth into color - * beforehand as depth should already be flushed. - */ - if (vk_format_is_depth_or_stencil(attachment->format)) { - tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS); - tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_DEPTH_TS); - tu6_emit_event_write(cmd, cs, PC_CCU_INVALIDATE_DEPTH); - } else { - tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS); - tu6_emit_event_write(cmd, cs, PC_CCU_INVALIDATE_COLOR); - } - - if (cmd->device->physical_device->info->a6xx.has_ccu_flush_bug) - tu_cs_emit_wfi(cs); -} - -void -tu_clear_gmem_attachment(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - uint32_t a, - const VkClearValue *value) -{ - const struct tu_render_pass_attachment *attachment = - &cmd->state.pass->attachments[a]; - - if (!attachment->clear_mask) - return; - - tu_emit_clear_gmem_attachment(cmd, cs, a, attachment->clear_mask, value); -} - -static void -tu_emit_blit(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - const struct tu_image_view *iview, - const struct tu_render_pass_attachment *attachment, - bool resolve, - bool separate_stencil) -{ - tu_cs_emit_regs(cs, - A6XX_RB_BLIT_GMEM_MSAA_CNTL(tu_msaa_samples(attachment->samples))); - - tu_cs_emit_regs(cs, A6XX_RB_BLIT_INFO( - .unk0 = !resolve, - .gmem = !resolve, - .sample_0 = vk_format_is_int(attachment->format) || - vk_format_is_depth_or_stencil(attachment->format))); - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 4); - if (iview->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) { - if (!separate_stencil) { - tu_cs_emit(cs, tu_image_view_depth(iview, RB_BLIT_DST_INFO)); - tu_cs_emit_qw(cs, iview->depth_base_addr); - tu_cs_emit(cs, iview->depth_PITCH); - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST, 3); - tu_cs_image_flag_ref(cs, &iview->view, 0); - } else { - tu_cs_emit(cs, tu_image_view_stencil(iview, RB_BLIT_DST_INFO) & ~A6XX_RB_BLIT_DST_INFO_FLAGS); - tu_cs_emit_qw(cs, iview->stencil_base_addr); - tu_cs_emit(cs, iview->stencil_PITCH); - } - } else { - tu_cs_emit(cs, iview->view.RB_BLIT_DST_INFO); - tu_cs_image_ref_2d(cs, &iview->view, 0, false); - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST, 3); - tu_cs_image_flag_ref(cs, &iview->view, 0); - } - - if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT && separate_stencil) { - tu_cs_emit_regs(cs, - A6XX_RB_BLIT_BASE_GMEM(tu_attachment_gmem_offset_stencil(cmd, attachment))); - } else { - tu_cs_emit_regs(cs, - A6XX_RB_BLIT_BASE_GMEM(tu_attachment_gmem_offset(cmd, attachment))); - } - - tu6_emit_event_write(cmd, cs, BLIT); -} - -static bool -blit_can_resolve(VkFormat format) -{ - const struct util_format_description *desc = vk_format_description(format); - - /* blit event can only do resolve for simple cases: - * averaging samples as unsigned integers or choosing only one sample - */ - if (vk_format_is_snorm(format) || vk_format_is_srgb(format)) - return false; - - /* can't do formats with larger channel sizes - * note: this includes all float formats - * note2: single channel integer formats seem OK - */ - if (desc->channel[0].size > 10) - return false; - - switch (format) { - /* for unknown reasons blit event can't msaa resolve these formats when tiled - * likely related to these formats having different layout from other cpp=2 formats - */ - case VK_FORMAT_R8G8_UNORM: - case VK_FORMAT_R8G8_UINT: - case VK_FORMAT_R8G8_SINT: - /* TODO: this one should be able to work? */ - case VK_FORMAT_D24_UNORM_S8_UINT: - return false; - default: - break; - } - - return true; -} - -static void -tu_begin_load_store_cond_exec(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, bool load) -{ - tu_cond_exec_start(cs, CP_COND_REG_EXEC_0_MODE(PRED_TEST)); - - if (!unlikely(cmd->device->physical_device->instance->debug_flags & - TU_DEBUG_LOG_SKIP_GMEM_OPS)) - return; - - uint64_t result_iova; - if (load) - result_iova = global_iova(cmd, dbg_gmem_taken_loads); - else - result_iova = global_iova(cmd, dbg_gmem_taken_stores); - - tu_cs_emit_pkt7(cs, CP_MEM_TO_MEM, 7); - tu_cs_emit(cs, CP_MEM_TO_MEM_0_NEG_B); - tu_cs_emit_qw(cs, result_iova); - tu_cs_emit_qw(cs, result_iova); - tu_cs_emit_qw(cs, global_iova(cmd, dbg_one)); -} - -static void -tu_end_load_store_cond_exec(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, bool load) -{ - tu_cond_exec_end(cs); - - if (!unlikely(cmd->device->physical_device->instance->debug_flags & - TU_DEBUG_LOG_SKIP_GMEM_OPS)) - return; - - uint64_t result_iova; - if (load) - result_iova = global_iova(cmd, dbg_gmem_total_loads); - else - result_iova = global_iova(cmd, dbg_gmem_total_stores); - - tu_cs_emit_pkt7(cs, CP_MEM_TO_MEM, 7); - tu_cs_emit(cs, CP_MEM_TO_MEM_0_NEG_B); - tu_cs_emit_qw(cs, result_iova); - tu_cs_emit_qw(cs, result_iova); - tu_cs_emit_qw(cs, global_iova(cmd, dbg_one)); -} - -void -tu_load_gmem_attachment(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - uint32_t a, - bool cond_exec_allowed, - bool force_load) -{ - const struct tu_image_view *iview = cmd->state.attachments[a]; - const struct tu_render_pass_attachment *attachment = - &cmd->state.pass->attachments[a]; - - bool load_common = attachment->load || force_load; - bool load_stencil = - attachment->load_stencil || - (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT && force_load); - - if (!load_common && !load_stencil) - return; - - trace_start_gmem_load(&cmd->trace, cs); - - /* If attachment will be cleared by vkCmdClearAttachments - it is likely - * that it would be partially cleared, and since it is done by 2d blit - * it doesn't produce geometry, so we have to unconditionally load. - * - * To simplify conditions treat partially cleared separate DS as fully - * cleared and don't emit cond_exec. - */ - bool cond_exec = cond_exec_allowed && attachment->cond_load_allowed; - if (cond_exec) - tu_begin_load_store_cond_exec(cmd, cs, true); - - if (load_common) - tu_emit_blit(cmd, cs, iview, attachment, false, false); - - if (load_stencil) - tu_emit_blit(cmd, cs, iview, attachment, false, true); - - if (cond_exec) - tu_end_load_store_cond_exec(cmd, cs, true); - - trace_end_gmem_load(&cmd->trace, cs, attachment->format, force_load); -} - -static void -store_cp_blit(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - const struct tu_image_view *iview, - uint32_t samples, - bool separate_stencil, - enum pipe_format src_format, - enum pipe_format dst_format, - uint32_t gmem_offset, - uint32_t cpp) -{ - r2d_setup_common(cmd, cs, src_format, dst_format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false, - iview->view.ubwc_enabled, true); - - if (iview->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) { - if (!separate_stencil) { - r2d_dst_depth(cs, iview, 0); - } else { - r2d_dst_stencil(cs, iview, 0); - } - } else { - r2d_dst(cs, &iview->view, 0, src_format); - } - - enum a6xx_format fmt = tu6_format_texture(src_format, TILE6_2).fmt; - fixup_src_format(&src_format, dst_format, &fmt); - - tu_cs_emit_regs(cs, - A6XX_SP_PS_2D_SRC_INFO( - .color_format = fmt, - .color_swap = WZYX, - .tile_mode = TILE6_2, - .srgb = util_format_is_srgb(src_format), - .samples = tu_msaa_samples(samples), - .samples_average = !util_format_is_pure_integer(dst_format) && - !util_format_is_depth_or_stencil(dst_format), - .unk20 = 1, - .unk22 = 1), - /* note: src size does not matter when not scaling */ - A6XX_SP_PS_2D_SRC_SIZE( .width = 0x3fff, .height = 0x3fff), - A6XX_SP_PS_2D_SRC(.qword = cmd->device->physical_device->gmem_base + gmem_offset), - A6XX_SP_PS_2D_SRC_PITCH(.pitch = cmd->state.tiling->tile0.width * cpp)); - - /* sync GMEM writes with CACHE. */ - tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE); - - /* Wait for CACHE_INVALIDATE to land */ - tu_cs_emit_wfi(cs); - - tu_cs_emit_pkt7(cs, CP_BLIT, 1); - tu_cs_emit(cs, CP_BLIT_0_OP(BLIT_OP_SCALE)); - - /* CP_BLIT writes to the CCU, unlike CP_EVENT_WRITE::BLIT which writes to - * sysmem, and we generally assume that GMEM renderpasses leave their - * results in sysmem, so we need to flush manually here. - */ - tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS); -} - -static void -store_3d_blit(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - const struct tu_image_view *iview, - uint32_t dst_samples, - bool separate_stencil, - enum pipe_format src_format, - enum pipe_format dst_format, - const VkRect2D *render_area, - uint32_t gmem_offset, - uint32_t cpp) -{ - /* RB_BIN_CONTROL/GRAS_BIN_CONTROL are normally only set once and they - * aren't set until we know whether we're HW binning or not, and we want to - * avoid a dependence on that here to be able to store attachments before - * the end of the renderpass in the future. Use the scratch space to - * save/restore them dynamically. - */ - tu_cs_emit_pkt7(cs, CP_REG_TO_SCRATCH, 1); - tu_cs_emit(cs, CP_REG_TO_SCRATCH_0_REG(REG_A6XX_RB_BIN_CONTROL) | - CP_REG_TO_SCRATCH_0_SCRATCH(0) | - CP_REG_TO_SCRATCH_0_CNT(1 - 1)); - - r3d_setup(cmd, cs, src_format, dst_format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false, - iview->view.ubwc_enabled, dst_samples); - - r3d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent); - - if (iview->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) { - if (!separate_stencil) { - r3d_dst_depth(cs, iview, 0); - } else { - r3d_dst_stencil(cs, iview, 0); - } - } else { - r3d_dst(cs, &iview->view, 0, src_format); - } - - r3d_src_gmem(cmd, cs, iview, src_format, dst_format, gmem_offset, cpp); - - /* sync GMEM writes with CACHE. */ - tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE); - - /* Wait for CACHE_INVALIDATE to land */ - tu_cs_emit_wfi(cs); - - r3d_run(cmd, cs); - - r3d_teardown(cmd, cs); - - /* Draws write to the CCU, unlike CP_EVENT_WRITE::BLIT which writes to - * sysmem, and we generally assume that GMEM renderpasses leave their - * results in sysmem, so we need to flush manually here. The 3d blit path - * writes to depth images as a color RT, so there's no need to flush depth. - */ - tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS); - - /* Restore RB_BIN_CONTROL/GRAS_BIN_CONTROL saved above. */ - tu_cs_emit_pkt7(cs, CP_SCRATCH_TO_REG, 1); - tu_cs_emit(cs, CP_SCRATCH_TO_REG_0_REG(REG_A6XX_RB_BIN_CONTROL) | - CP_SCRATCH_TO_REG_0_SCRATCH(0) | - CP_SCRATCH_TO_REG_0_CNT(1 - 1)); - - tu_cs_emit_pkt7(cs, CP_SCRATCH_TO_REG, 1); - tu_cs_emit(cs, CP_SCRATCH_TO_REG_0_REG(REG_A6XX_GRAS_BIN_CONTROL) | - CP_SCRATCH_TO_REG_0_SCRATCH(0) | - CP_SCRATCH_TO_REG_0_CNT(1 - 1)); -} - -static bool -tu_attachment_store_unaligned(struct tu_cmd_buffer *cmd, uint32_t a) -{ - struct tu_physical_device *phys_dev = cmd->device->physical_device; - const struct tu_image_view *iview = cmd->state.attachments[a]; - const VkRect2D *render_area = &cmd->state.render_area; - - /* Unaligned store is incredibly rare in CTS, we have to force it to test. */ - if (unlikely(cmd->device->physical_device->instance->debug_flags & TU_DEBUG_UNALIGNED_STORE)) - return true; - - uint32_t x1 = render_area->offset.x; - uint32_t y1 = render_area->offset.y; - uint32_t x2 = x1 + render_area->extent.width; - uint32_t y2 = y1 + render_area->extent.height; - /* x2/y2 can be unaligned if equal to the size of the image, since it will - * write into padding space. The one exception is linear levels which don't - * have the required y padding in the layout (except for the last level) - */ - bool need_y2_align = - y2 != iview->view.height || iview->view.need_y2_align; - - return (x1 % phys_dev->info->gmem_align_w || - (x2 % phys_dev->info->gmem_align_w && x2 != iview->view.width) || - y1 % phys_dev->info->gmem_align_h || - (y2 % phys_dev->info->gmem_align_h && need_y2_align)); -} - -/* Choose the GMEM layout (use the CCU space or not) based on whether the - * current attachments will need. This has to happen at vkBeginRenderPass() - * time because tu_attachment_store_unaligned() looks at the image views, which - * are only available at that point. This should match the logic for the - * !unaligned case in tu_store_gmem_attachment(). - */ -void -tu_choose_gmem_layout(struct tu_cmd_buffer *cmd) -{ - cmd->state.gmem_layout = TU_GMEM_LAYOUT_FULL; - - for (unsigned i = 0; i < cmd->state.pass->attachment_count; i++) { - if (!cmd->state.attachments[i]) - continue; - - struct tu_render_pass_attachment *att = - &cmd->state.pass->attachments[i]; - if ((att->store || att->store_stencil) && - tu_attachment_store_unaligned(cmd, i)) - cmd->state.gmem_layout = TU_GMEM_LAYOUT_AVOID_CCU; - if (att->will_be_resolved && !blit_can_resolve(att->format)) - cmd->state.gmem_layout = TU_GMEM_LAYOUT_AVOID_CCU; - } - - cmd->state.tiling = &cmd->state.framebuffer->tiling[cmd->state.gmem_layout]; -} - -void -tu_store_gmem_attachment(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - uint32_t a, - uint32_t gmem_a, - bool cond_exec_allowed) -{ - const VkRect2D *render_area = &cmd->state.render_area; - struct tu_render_pass_attachment *dst = &cmd->state.pass->attachments[a]; - const struct tu_image_view *iview = cmd->state.attachments[a]; - struct tu_render_pass_attachment *src = &cmd->state.pass->attachments[gmem_a]; - - if (!dst->store && !dst->store_stencil) - return; - - trace_start_gmem_store(&cmd->trace, cs); - - /* Unconditional store should happen only if attachment was cleared, - * which could have happened either by load_op or via vkCmdClearAttachments. - */ - bool cond_exec = cond_exec_allowed && src->cond_store_allowed; - if (cond_exec) { - tu_begin_load_store_cond_exec(cmd, cs, false); - } - - bool unaligned = tu_attachment_store_unaligned(cmd, a); - - /* D32_SFLOAT_S8_UINT is quite special format: it has two planes, - * one for depth and other for stencil. When resolving a MSAA - * D32_SFLOAT_S8_UINT to S8_UINT, we need to take that into account. - */ - bool resolve_d32s8_s8 = - src->format == VK_FORMAT_D32_SFLOAT_S8_UINT && - dst->format == VK_FORMAT_S8_UINT; - - /* The fast path doesn't support picking out the last component of a D24S8 - * texture reinterpreted as RGBA8_UNORM. - */ - bool resolve_d24s8_s8 = - src->format == VK_FORMAT_D24_UNORM_S8_UINT && - dst->format == VK_FORMAT_S8_UINT; - - bool store_common = dst->store && !resolve_d32s8_s8; - bool store_separate_stencil = dst->store_stencil || resolve_d32s8_s8; - - /* use fast path when render area is aligned, except for unsupported resolve cases */ - if (!unaligned && !resolve_d24s8_s8 && - (a == gmem_a || blit_can_resolve(dst->format))) { - if (store_common) - tu_emit_blit(cmd, cs, iview, src, true, false); - if (store_separate_stencil) - tu_emit_blit(cmd, cs, iview, src, true, true); - - if (cond_exec) { - tu_end_load_store_cond_exec(cmd, cs, false); - } - - trace_end_gmem_store(&cmd->trace, cs, dst->format, true, false); - return; - } - - assert(cmd->state.gmem_layout == TU_GMEM_LAYOUT_AVOID_CCU); - - enum pipe_format src_format = tu_vk_format_to_pipe_format(src->format); - if (src_format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) - src_format = PIPE_FORMAT_Z32_FLOAT; - - enum pipe_format dst_format = tu_vk_format_to_pipe_format(dst->format); - if (dst_format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) - dst_format = PIPE_FORMAT_Z32_FLOAT; - - if (dst->samples > 1) { - /* If we hit this path, we have to disable draw states after every tile - * instead of once at the end of the renderpass, so that they aren't - * executed when calling CP_DRAW. - * - * TODO: store a flag somewhere so we don't do this more than once and - * don't do it after the renderpass when this happens. - */ - if (store_common || store_separate_stencil) - tu_disable_draw_states(cmd, cs); - - if (store_common) { - store_3d_blit(cmd, cs, iview, dst->samples, false, src_format, - dst_format, render_area, tu_attachment_gmem_offset(cmd, src), src->cpp); - } - if (store_separate_stencil) { - store_3d_blit(cmd, cs, iview, dst->samples, true, PIPE_FORMAT_S8_UINT, - PIPE_FORMAT_S8_UINT, render_area, - tu_attachment_gmem_offset_stencil(cmd, src), src->samples); - } - } else { - r2d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent); - - if (store_common) { - store_cp_blit(cmd, cs, iview, src->samples, false, src_format, - dst_format, tu_attachment_gmem_offset(cmd, src), src->cpp); - } - if (store_separate_stencil) { - store_cp_blit(cmd, cs, iview, src->samples, true, PIPE_FORMAT_S8_UINT, - PIPE_FORMAT_S8_UINT, tu_attachment_gmem_offset_stencil(cmd, src), src->samples); - } - } - - if (cond_exec) { - tu_end_load_store_cond_exec(cmd, cs, false); - } - - trace_end_gmem_store(&cmd->trace, cs, dst->format, false, unaligned); -} diff --git a/lib/mesa/src/freedreno/vulkan/tu_cmd_buffer.c b/lib/mesa/src/freedreno/vulkan/tu_cmd_buffer.c deleted file mode 100644 index fe436e595..000000000 --- a/lib/mesa/src/freedreno/vulkan/tu_cmd_buffer.c +++ /dev/null @@ -1,2637 +0,0 @@ -/* - * Copyright © 2016 Red Hat. - * Copyright © 2016 Bas Nieuwenhuizen - * - * based in part on anv driver which is: - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "tu_private.h" - -#include "registers/adreno_pm4.xml.h" -#include "registers/adreno_common.xml.h" -#include "registers/a6xx.xml.h" - -#include "vk_format.h" - -#include "tu_cs.h" - -void -tu_bo_list_init(struct tu_bo_list *list) -{ - list->count = list->capacity = 0; - list->bo_infos = NULL; -} - -void -tu_bo_list_destroy(struct tu_bo_list *list) -{ - free(list->bo_infos); -} - -void -tu_bo_list_reset(struct tu_bo_list *list) -{ - list->count = 0; -} - -/** - * \a flags consists of MSM_SUBMIT_BO_FLAGS. - */ -static uint32_t -tu_bo_list_add_info(struct tu_bo_list *list, - const struct drm_msm_gem_submit_bo *bo_info) -{ - for (uint32_t i = 0; i < list->count; ++i) { - if (list->bo_infos[i].handle == bo_info->handle) { - assert(list->bo_infos[i].presumed == bo_info->presumed); - list->bo_infos[i].flags |= bo_info->flags; - return i; - } - } - - /* grow list->bo_infos if needed */ - if (list->count == list->capacity) { - uint32_t new_capacity = MAX2(2 * list->count, 16); - struct drm_msm_gem_submit_bo *new_bo_infos = realloc( - list->bo_infos, new_capacity * sizeof(struct drm_msm_gem_submit_bo)); - if (!new_bo_infos) - return TU_BO_LIST_FAILED; - list->bo_infos = new_bo_infos; - list->capacity = new_capacity; - } - - list->bo_infos[list->count] = *bo_info; - return list->count++; -} - -uint32_t -tu_bo_list_add(struct tu_bo_list *list, - const struct tu_bo *bo, - uint32_t flags) -{ - return tu_bo_list_add_info(list, &(struct drm_msm_gem_submit_bo) { - .flags = flags, - .handle = bo->gem_handle, - .presumed = bo->iova, - }); -} - -VkResult -tu_bo_list_merge(struct tu_bo_list *list, const struct tu_bo_list *other) -{ - for (uint32_t i = 0; i < other->count; i++) { - if (tu_bo_list_add_info(list, other->bo_infos + i) == TU_BO_LIST_FAILED) - return VK_ERROR_OUT_OF_HOST_MEMORY; - } - - return VK_SUCCESS; -} - -static VkResult -tu_tiling_config_update_gmem_layout(struct tu_tiling_config *tiling, - const struct tu_device *dev) -{ - const uint32_t gmem_size = dev->physical_device->gmem_size; - uint32_t offset = 0; - - for (uint32_t i = 0; i < tiling->buffer_count; i++) { - /* 16KB-aligned */ - offset = align(offset, 0x4000); - - tiling->gmem_offsets[i] = offset; - offset += tiling->tile0.extent.width * tiling->tile0.extent.height * - tiling->buffer_cpp[i]; - } - - return offset <= gmem_size ? VK_SUCCESS : VK_ERROR_OUT_OF_DEVICE_MEMORY; -} - -static void -tu_tiling_config_update_tile_layout(struct tu_tiling_config *tiling, - const struct tu_device *dev) -{ - const uint32_t tile_align_w = dev->physical_device->tile_align_w; - const uint32_t tile_align_h = dev->physical_device->tile_align_h; - const uint32_t max_tile_width = 1024; /* A6xx */ - - tiling->tile0.offset = (VkOffset2D) { - .x = tiling->render_area.offset.x & ~(tile_align_w - 1), - .y = tiling->render_area.offset.y & ~(tile_align_h - 1), - }; - - const uint32_t ra_width = - tiling->render_area.extent.width + - (tiling->render_area.offset.x - tiling->tile0.offset.x); - const uint32_t ra_height = - tiling->render_area.extent.height + - (tiling->render_area.offset.y - tiling->tile0.offset.y); - - /* start from 1 tile */ - tiling->tile_count = (VkExtent2D) { - .width = 1, - .height = 1, - }; - tiling->tile0.extent = (VkExtent2D) { - .width = align(ra_width, tile_align_w), - .height = align(ra_height, tile_align_h), - }; - - /* do not exceed max tile width */ - while (tiling->tile0.extent.width > max_tile_width) { - tiling->tile_count.width++; - tiling->tile0.extent.width = - align(ra_width / tiling->tile_count.width, tile_align_w); - } - - /* do not exceed gmem size */ - while (tu_tiling_config_update_gmem_layout(tiling, dev) != VK_SUCCESS) { - if (tiling->tile0.extent.width > tiling->tile0.extent.height) { - tiling->tile_count.width++; - tiling->tile0.extent.width = - align(ra_width / tiling->tile_count.width, tile_align_w); - } else { - tiling->tile_count.height++; - tiling->tile0.extent.height = - align(ra_height / tiling->tile_count.height, tile_align_h); - } - } -} - -static void -tu_tiling_config_update_pipe_layout(struct tu_tiling_config *tiling, - const struct tu_device *dev) -{ - const uint32_t max_pipe_count = 32; /* A6xx */ - - /* start from 1 tile per pipe */ - tiling->pipe0 = (VkExtent2D) { - .width = 1, - .height = 1, - }; - tiling->pipe_count = tiling->tile_count; - - /* do not exceed max pipe count vertically */ - while (tiling->pipe_count.height > max_pipe_count) { - tiling->pipe0.height += 2; - tiling->pipe_count.height = - (tiling->tile_count.height + tiling->pipe0.height - 1) / - tiling->pipe0.height; - } - - /* do not exceed max pipe count */ - while (tiling->pipe_count.width * tiling->pipe_count.height > - max_pipe_count) { - tiling->pipe0.width += 1; - tiling->pipe_count.width = - (tiling->tile_count.width + tiling->pipe0.width - 1) / - tiling->pipe0.width; - } -} - -static void -tu_tiling_config_update_pipes(struct tu_tiling_config *tiling, - const struct tu_device *dev) -{ - const uint32_t max_pipe_count = 32; /* A6xx */ - const uint32_t used_pipe_count = - tiling->pipe_count.width * tiling->pipe_count.height; - const VkExtent2D last_pipe = { - .width = tiling->tile_count.width % tiling->pipe0.width, - .height = tiling->tile_count.height % tiling->pipe0.height, - }; - - assert(used_pipe_count <= max_pipe_count); - assert(max_pipe_count <= ARRAY_SIZE(tiling->pipe_config)); - - for (uint32_t y = 0; y < tiling->pipe_count.height; y++) { - for (uint32_t x = 0; x < tiling->pipe_count.width; x++) { - const uint32_t pipe_x = tiling->pipe0.width * x; - const uint32_t pipe_y = tiling->pipe0.height * y; - const uint32_t pipe_w = (x == tiling->pipe_count.width - 1) - ? last_pipe.width - : tiling->pipe0.width; - const uint32_t pipe_h = (y == tiling->pipe_count.height - 1) - ? last_pipe.height - : tiling->pipe0.height; - const uint32_t n = tiling->pipe_count.width * y + x; - - tiling->pipe_config[n] = A6XX_VSC_PIPE_CONFIG_REG_X(pipe_x) | - A6XX_VSC_PIPE_CONFIG_REG_Y(pipe_y) | - A6XX_VSC_PIPE_CONFIG_REG_W(pipe_w) | - A6XX_VSC_PIPE_CONFIG_REG_H(pipe_h); - tiling->pipe_sizes[n] = CP_SET_BIN_DATA5_0_VSC_SIZE(pipe_w * pipe_h); - } - } - - memset(tiling->pipe_config + used_pipe_count, 0, - sizeof(uint32_t) * (max_pipe_count - used_pipe_count)); -} - -static void -tu_tiling_config_update(struct tu_tiling_config *tiling, - const struct tu_device *dev, - const uint32_t *buffer_cpp, - uint32_t buffer_count, - const VkRect2D *render_area) -{ - /* see if there is any real change */ - const bool ra_changed = - render_area && - memcmp(&tiling->render_area, render_area, sizeof(*render_area)); - const bool buf_changed = tiling->buffer_count != buffer_count || - memcmp(tiling->buffer_cpp, buffer_cpp, - sizeof(*buffer_cpp) * buffer_count); - if (!ra_changed && !buf_changed) - return; - - if (ra_changed) - tiling->render_area = *render_area; - - if (buf_changed) { - memcpy(tiling->buffer_cpp, buffer_cpp, - sizeof(*buffer_cpp) * buffer_count); - tiling->buffer_count = buffer_count; - } - - tu_tiling_config_update_tile_layout(tiling, dev); - tu_tiling_config_update_pipe_layout(tiling, dev); - tu_tiling_config_update_pipes(tiling, dev); -} - -static void -tu_tiling_config_get_tile(const struct tu_tiling_config *tiling, - const struct tu_device *dev, - uint32_t tx, - uint32_t ty, - struct tu_tile *tile) -{ - /* find the pipe and the slot for tile (tx, ty) */ - const uint32_t px = tx / tiling->pipe0.width; - const uint32_t py = ty / tiling->pipe0.height; - const uint32_t sx = tx - tiling->pipe0.width * px; - const uint32_t sy = ty - tiling->pipe0.height * py; - - assert(tx < tiling->tile_count.width && ty < tiling->tile_count.height); - assert(px < tiling->pipe_count.width && py < tiling->pipe_count.height); - assert(sx < tiling->pipe0.width && sy < tiling->pipe0.height); - - /* convert to 1D indices */ - tile->pipe = tiling->pipe_count.width * py + px; - tile->slot = tiling->pipe0.width * sy + sx; - - /* get the blit area for the tile */ - tile->begin = (VkOffset2D) { - .x = tiling->tile0.offset.x + tiling->tile0.extent.width * tx, - .y = tiling->tile0.offset.y + tiling->tile0.extent.height * ty, - }; - tile->end.x = - (tx == tiling->tile_count.width - 1) - ? tiling->render_area.offset.x + tiling->render_area.extent.width - : tile->begin.x + tiling->tile0.extent.width; - tile->end.y = - (ty == tiling->tile_count.height - 1) - ? tiling->render_area.offset.y + tiling->render_area.extent.height - : tile->begin.y + tiling->tile0.extent.height; -} - -static enum a3xx_msaa_samples -tu6_msaa_samples(uint32_t samples) -{ - switch (samples) { - case 1: - return MSAA_ONE; - case 2: - return MSAA_TWO; - case 4: - return MSAA_FOUR; - case 8: - return MSAA_EIGHT; - default: - assert(!"invalid sample count"); - return MSAA_ONE; - } -} - -static enum a4xx_index_size -tu6_index_size(VkIndexType type) -{ - switch (type) { - case VK_INDEX_TYPE_UINT16: - return INDEX4_SIZE_16_BIT; - case VK_INDEX_TYPE_UINT32: - return INDEX4_SIZE_32_BIT; - default: - unreachable("invalid VkIndexType"); - return INDEX4_SIZE_8_BIT; - } -} - -static void -tu6_emit_marker(struct tu_cmd_buffer *cmd, struct tu_cs *cs) -{ - tu_cs_emit_write_reg(cs, cmd->marker_reg, ++cmd->marker_seqno); -} - -void -tu6_emit_event_write(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - enum vgt_event_type event, - bool need_seqno) -{ - tu_cs_emit_pkt7(cs, CP_EVENT_WRITE, need_seqno ? 4 : 1); - tu_cs_emit(cs, CP_EVENT_WRITE_0_EVENT(event)); - if (need_seqno) { - tu_cs_emit_qw(cs, cmd->scratch_bo.iova); - tu_cs_emit(cs, ++cmd->scratch_seqno); - } -} - -static void -tu6_emit_cache_flush(struct tu_cmd_buffer *cmd, struct tu_cs *cs) -{ - tu6_emit_event_write(cmd, cs, 0x31, false); -} - -static void -tu6_emit_lrz_flush(struct tu_cmd_buffer *cmd, struct tu_cs *cs) -{ - tu6_emit_event_write(cmd, cs, LRZ_FLUSH, false); -} - -static void -tu6_emit_wfi(struct tu_cmd_buffer *cmd, struct tu_cs *cs) -{ - if (cmd->wait_for_idle) { - tu_cs_emit_wfi(cs); - cmd->wait_for_idle = false; - } -} - -static void -tu6_emit_zs(struct tu_cmd_buffer *cmd, struct tu_cs *cs) -{ - const struct tu_subpass *subpass = cmd->state.subpass; - - const uint32_t a = subpass->depth_stencil_attachment.attachment; - if (a == VK_ATTACHMENT_UNUSED) { - tu_cs_emit_pkt4(cs, REG_A6XX_RB_DEPTH_BUFFER_INFO, 6); - tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH6_NONE)); - tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_BUFFER_PITCH */ - tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_BUFFER_ARRAY_PITCH */ - tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_BUFFER_BASE_LO */ - tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */ - tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_BUFFER_BASE_GMEM */ - - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_DEPTH_BUFFER_INFO, 1); - tu_cs_emit(cs, - A6XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH6_NONE)); - - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_LRZ_BUFFER_BASE_LO, 5); - tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */ - tu_cs_emit(cs, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */ - tu_cs_emit(cs, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */ - tu_cs_emit(cs, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO */ - tu_cs_emit(cs, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI */ - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCIL_INFO, 1); - tu_cs_emit(cs, 0x00000000); /* RB_STENCIL_INFO */ - - return; - } - - /* enable zs? */ -} - -static void -tu6_emit_mrt(struct tu_cmd_buffer *cmd, struct tu_cs *cs) -{ - const struct tu_framebuffer *fb = cmd->state.framebuffer; - const struct tu_subpass *subpass = cmd->state.subpass; - const struct tu_tiling_config *tiling = &cmd->state.tiling_config; - unsigned char mrt_comp[MAX_RTS] = { 0 }; - unsigned srgb_cntl = 0; - - uint32_t gmem_index = 0; - for (uint32_t i = 0; i < subpass->color_count; ++i) { - uint32_t a = subpass->color_attachments[i].attachment; - if (a == VK_ATTACHMENT_UNUSED) - continue; - - const struct tu_image_view *iview = fb->attachments[a].attachment; - const struct tu_image_level *slice = - &iview->image->levels[iview->base_mip]; - const enum a6xx_tile_mode tile_mode = TILE6_LINEAR; - uint32_t stride = 0; - uint32_t offset = 0; - - mrt_comp[i] = 0xf; - - if (vk_format_is_srgb(iview->vk_format)) - srgb_cntl |= (1 << i); - - const struct tu_native_format *format = - tu6_get_native_format(iview->vk_format); - assert(format && format->rb >= 0); - - offset = slice->offset + slice->size * iview->base_layer; - stride = slice->pitch * vk_format_get_blocksize(iview->vk_format); - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(i), 6); - tu_cs_emit(cs, A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format->rb) | - A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) | - A6XX_RB_MRT_BUF_INFO_COLOR_SWAP(format->swap)); - tu_cs_emit(cs, A6XX_RB_MRT_PITCH(stride)); - tu_cs_emit(cs, A6XX_RB_MRT_ARRAY_PITCH(slice->size)); - tu_cs_emit_qw(cs, iview->image->bo->iova + iview->image->bo_offset + - offset); /* BASE_LO/HI */ - tu_cs_emit( - cs, tiling->gmem_offsets[gmem_index++]); /* RB_MRT[i].BASE_GMEM */ - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_MRT_REG(i), 1); - tu_cs_emit(cs, A6XX_SP_FS_MRT_REG_COLOR_FORMAT(format->rb)); - -#if 0 - /* when we support UBWC, these would be the system memory - * addr/pitch/etc: - */ - tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_FLAG_BUFFER(i), 4); - tu_cs_emit(cs, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_LO */ - tu_cs_emit(cs, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_HI */ - tu_cs_emit(cs, A6XX_RB_MRT_FLAG_BUFFER_PITCH(0)); - tu_cs_emit(cs, A6XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(0)); -#endif - } - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_SRGB_CNTL, 1); - tu_cs_emit(cs, srgb_cntl); - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_SRGB_CNTL, 1); - tu_cs_emit(cs, srgb_cntl); - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_RENDER_COMPONENTS, 1); - tu_cs_emit(cs, A6XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) | - A6XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) | - A6XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) | - A6XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) | - A6XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) | - A6XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) | - A6XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) | - A6XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7])); - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_RENDER_COMPONENTS, 1); - tu_cs_emit(cs, A6XX_SP_FS_RENDER_COMPONENTS_RT0(mrt_comp[0]) | - A6XX_SP_FS_RENDER_COMPONENTS_RT1(mrt_comp[1]) | - A6XX_SP_FS_RENDER_COMPONENTS_RT2(mrt_comp[2]) | - A6XX_SP_FS_RENDER_COMPONENTS_RT3(mrt_comp[3]) | - A6XX_SP_FS_RENDER_COMPONENTS_RT4(mrt_comp[4]) | - A6XX_SP_FS_RENDER_COMPONENTS_RT5(mrt_comp[5]) | - A6XX_SP_FS_RENDER_COMPONENTS_RT6(mrt_comp[6]) | - A6XX_SP_FS_RENDER_COMPONENTS_RT7(mrt_comp[7])); -} - -static void -tu6_emit_msaa(struct tu_cmd_buffer *cmd, struct tu_cs *cs) -{ - const struct tu_subpass *subpass = cmd->state.subpass; - const enum a3xx_msaa_samples samples = - tu6_msaa_samples(subpass->max_sample_count); - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_RAS_MSAA_CNTL, 2); - tu_cs_emit(cs, A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES(samples)); - tu_cs_emit( - cs, A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES(samples) | - ((samples == MSAA_ONE) ? A6XX_SP_TP_DEST_MSAA_CNTL_MSAA_DISABLE - : 0)); - - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_RAS_MSAA_CNTL, 2); - tu_cs_emit(cs, A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES(samples)); - tu_cs_emit( - cs, - A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES(samples) | - ((samples == MSAA_ONE) ? A6XX_GRAS_DEST_MSAA_CNTL_MSAA_DISABLE : 0)); - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_RAS_MSAA_CNTL, 2); - tu_cs_emit(cs, A6XX_RB_RAS_MSAA_CNTL_SAMPLES(samples)); - tu_cs_emit( - cs, - A6XX_RB_DEST_MSAA_CNTL_SAMPLES(samples) | - ((samples == MSAA_ONE) ? A6XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE : 0)); - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_MSAA_CNTL, 1); - tu_cs_emit(cs, A6XX_RB_MSAA_CNTL_SAMPLES(samples)); -} - -static void -tu6_emit_bin_size(struct tu_cmd_buffer *cmd, struct tu_cs *cs, uint32_t flags) -{ - const struct tu_tiling_config *tiling = &cmd->state.tiling_config; - const uint32_t bin_w = tiling->tile0.extent.width; - const uint32_t bin_h = tiling->tile0.extent.height; - - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_BIN_CONTROL, 1); - tu_cs_emit(cs, A6XX_GRAS_BIN_CONTROL_BINW(bin_w) | - A6XX_GRAS_BIN_CONTROL_BINH(bin_h) | flags); - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_BIN_CONTROL, 1); - tu_cs_emit(cs, A6XX_RB_BIN_CONTROL_BINW(bin_w) | - A6XX_RB_BIN_CONTROL_BINH(bin_h) | flags); - - /* no flag for RB_BIN_CONTROL2... */ - tu_cs_emit_pkt4(cs, REG_A6XX_RB_BIN_CONTROL2, 1); - tu_cs_emit(cs, A6XX_RB_BIN_CONTROL2_BINW(bin_w) | - A6XX_RB_BIN_CONTROL2_BINH(bin_h)); -} - -static void -tu6_emit_render_cntl(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - bool binning) -{ - uint32_t cntl = 0; - cntl |= A6XX_RB_RENDER_CNTL_UNK4; - if (binning) - cntl |= A6XX_RB_RENDER_CNTL_BINNING; - - tu_cs_emit_pkt7(cs, CP_REG_WRITE, 3); - tu_cs_emit(cs, 0x2); - tu_cs_emit(cs, REG_A6XX_RB_RENDER_CNTL); - tu_cs_emit(cs, cntl); -} - -static void -tu6_emit_blit_scissor(struct tu_cmd_buffer *cmd, struct tu_cs *cs) -{ - const VkRect2D *render_area = &cmd->state.tiling_config.render_area; - const uint32_t x1 = render_area->offset.x; - const uint32_t y1 = render_area->offset.y; - const uint32_t x2 = x1 + render_area->extent.width - 1; - const uint32_t y2 = y1 + render_area->extent.height - 1; - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_SCISSOR_TL, 2); - tu_cs_emit(cs, - A6XX_RB_BLIT_SCISSOR_TL_X(x1) | A6XX_RB_BLIT_SCISSOR_TL_Y(y1)); - tu_cs_emit(cs, - A6XX_RB_BLIT_SCISSOR_BR_X(x2) | A6XX_RB_BLIT_SCISSOR_BR_Y(y2)); -} - -static void -tu6_emit_blit_info(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - const struct tu_image_view *iview, - uint32_t gmem_offset, - uint32_t blit_info) -{ - const struct tu_image_level *slice = - &iview->image->levels[iview->base_mip]; - const uint32_t offset = slice->offset + slice->size * iview->base_layer; - const uint32_t stride = - slice->pitch * vk_format_get_blocksize(iview->vk_format); - const enum a6xx_tile_mode tile_mode = TILE6_LINEAR; - const enum a3xx_msaa_samples samples = tu6_msaa_samples(1); - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_INFO, 1); - tu_cs_emit(cs, blit_info); - - /* tile mode? */ - const struct tu_native_format *format = - tu6_get_native_format(iview->vk_format); - assert(format && format->rb >= 0); - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 5); - tu_cs_emit(cs, A6XX_RB_BLIT_DST_INFO_TILE_MODE(tile_mode) | - A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) | - A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(format->rb) | - A6XX_RB_BLIT_DST_INFO_COLOR_SWAP(format->swap)); - tu_cs_emit_qw(cs, - iview->image->bo->iova + iview->image->bo_offset + offset); - tu_cs_emit(cs, A6XX_RB_BLIT_DST_PITCH(stride)); - tu_cs_emit(cs, A6XX_RB_BLIT_DST_ARRAY_PITCH(slice->size)); - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_BASE_GMEM, 1); - tu_cs_emit(cs, gmem_offset); -} - -static void -tu6_emit_blit_clear(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - const struct tu_image_view *iview, - uint32_t gmem_offset, - const VkClearValue *clear_value) -{ - const enum a6xx_tile_mode tile_mode = TILE6_LINEAR; - const enum a3xx_msaa_samples samples = tu6_msaa_samples(1); - - const struct tu_native_format *format = - tu6_get_native_format(iview->vk_format); - assert(format && format->rb >= 0); - /* must be WZYX; other values are ignored */ - const enum a3xx_color_swap swap = WZYX; - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 1); - tu_cs_emit(cs, A6XX_RB_BLIT_DST_INFO_TILE_MODE(tile_mode) | - A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) | - A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(format->rb) | - A6XX_RB_BLIT_DST_INFO_COLOR_SWAP(swap)); - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_INFO, 1); - tu_cs_emit(cs, A6XX_RB_BLIT_INFO_GMEM | A6XX_RB_BLIT_INFO_CLEAR_MASK(0xf)); - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_BASE_GMEM, 1); - tu_cs_emit(cs, gmem_offset); - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_88D0, 1); - tu_cs_emit(cs, 0); - - /* pack clear_value into WZYX order */ - uint32_t clear_vals[4] = { 0 }; - tu_pack_clear_value(clear_value, iview->vk_format, clear_vals); - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4); - tu_cs_emit(cs, clear_vals[0]); - tu_cs_emit(cs, clear_vals[1]); - tu_cs_emit(cs, clear_vals[2]); - tu_cs_emit(cs, clear_vals[3]); -} - -static void -tu6_emit_blit(struct tu_cmd_buffer *cmd, struct tu_cs *cs) -{ - tu6_emit_marker(cmd, cs); - tu6_emit_event_write(cmd, cs, BLIT, false); - tu6_emit_marker(cmd, cs); -} - -static void -tu6_emit_window_scissor(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - uint32_t x1, - uint32_t y1, - uint32_t x2, - uint32_t y2) -{ - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_WINDOW_SCISSOR_TL, 2); - tu_cs_emit(cs, A6XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) | - A6XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1)); - tu_cs_emit(cs, A6XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) | - A6XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2)); - - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_RESOLVE_CNTL_1, 2); - tu_cs_emit( - cs, A6XX_GRAS_RESOLVE_CNTL_1_X(x1) | A6XX_GRAS_RESOLVE_CNTL_1_Y(y1)); - tu_cs_emit( - cs, A6XX_GRAS_RESOLVE_CNTL_2_X(x2) | A6XX_GRAS_RESOLVE_CNTL_2_Y(y2)); -} - -static void -tu6_emit_window_offset(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - uint32_t x1, - uint32_t y1) -{ - tu_cs_emit_pkt4(cs, REG_A6XX_RB_WINDOW_OFFSET, 1); - tu_cs_emit(cs, A6XX_RB_WINDOW_OFFSET_X(x1) | A6XX_RB_WINDOW_OFFSET_Y(y1)); - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_WINDOW_OFFSET2, 1); - tu_cs_emit(cs, - A6XX_RB_WINDOW_OFFSET2_X(x1) | A6XX_RB_WINDOW_OFFSET2_Y(y1)); - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_WINDOW_OFFSET, 1); - tu_cs_emit(cs, A6XX_SP_WINDOW_OFFSET_X(x1) | A6XX_SP_WINDOW_OFFSET_Y(y1)); - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_WINDOW_OFFSET, 1); - tu_cs_emit( - cs, A6XX_SP_TP_WINDOW_OFFSET_X(x1) | A6XX_SP_TP_WINDOW_OFFSET_Y(y1)); -} - -static void -tu6_emit_tile_select(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - const struct tu_tile *tile) -{ - tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1); - tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(0x7)); - - tu6_emit_marker(cmd, cs); - tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1); - tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_GMEM) | 0x10); - tu6_emit_marker(cmd, cs); - - const uint32_t x1 = tile->begin.x; - const uint32_t y1 = tile->begin.y; - const uint32_t x2 = tile->end.x - 1; - const uint32_t y2 = tile->end.y - 1; - tu6_emit_window_scissor(cmd, cs, x1, y1, x2, y2); - tu6_emit_window_offset(cmd, cs, x1, y1); - - tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_OVERRIDE, 1); - tu_cs_emit(cs, A6XX_VPC_SO_OVERRIDE_SO_DISABLE); - - if (false) { - /* hw binning? */ - } else { - tu_cs_emit_pkt7(cs, CP_SET_VISIBILITY_OVERRIDE, 1); - tu_cs_emit(cs, 0x1); - - tu_cs_emit_pkt7(cs, CP_SET_MODE, 1); - tu_cs_emit(cs, 0x0); - } -} - -static void -tu6_emit_tile_load(struct tu_cmd_buffer *cmd, struct tu_cs *cs) -{ - const struct tu_framebuffer *fb = cmd->state.framebuffer; - const struct tu_subpass *subpass = cmd->state.subpass; - const struct tu_tiling_config *tiling = &cmd->state.tiling_config; - const struct tu_attachment_state *attachments = cmd->state.attachments; - - tu6_emit_blit_scissor(cmd, cs); - - uint32_t gmem_index = 0; - for (uint32_t i = 0; i < subpass->color_count; ++i) { - const uint32_t a = subpass->color_attachments[i].attachment; - if (a == VK_ATTACHMENT_UNUSED) - continue; - - const struct tu_image_view *iview = fb->attachments[a].attachment; - const struct tu_attachment_state *att = attachments + a; - if (att->pending_clear_aspects) { - assert(att->pending_clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT); - tu6_emit_blit_clear(cmd, cs, iview, - tiling->gmem_offsets[gmem_index++], - &att->clear_value); - } else { - tu6_emit_blit_info(cmd, cs, iview, - tiling->gmem_offsets[gmem_index++], - A6XX_RB_BLIT_INFO_UNK0 | A6XX_RB_BLIT_INFO_GMEM); - } - - tu6_emit_blit(cmd, cs); - } - - /* load/clear zs? */ -} - -static void -tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs) -{ - const struct tu_framebuffer *fb = cmd->state.framebuffer; - const struct tu_tiling_config *tiling = &cmd->state.tiling_config; - - if (false) { - /* hw binning? */ - } - - tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3); - tu_cs_emit(cs, CP_SET_DRAW_STATE__0_COUNT(0) | - CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS | - CP_SET_DRAW_STATE__0_GROUP_ID(0)); - tu_cs_emit(cs, CP_SET_DRAW_STATE__1_ADDR_LO(0)); - tu_cs_emit(cs, CP_SET_DRAW_STATE__2_ADDR_HI(0)); - - tu_cs_emit_pkt7(cs, CP_SKIP_IB2_ENABLE_GLOBAL, 1); - tu_cs_emit(cs, 0x0); - - tu6_emit_marker(cmd, cs); - tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1); - tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE) | 0x10); - tu6_emit_marker(cmd, cs); - - tu6_emit_blit_scissor(cmd, cs); - - uint32_t gmem_index = 0; - for (uint32_t i = 0; i < cmd->state.subpass->color_count; ++i) { - uint32_t a = cmd->state.subpass->color_attachments[i].attachment; - if (a == VK_ATTACHMENT_UNUSED) - continue; - - const struct tu_image_view *iview = fb->attachments[a].attachment; - tu6_emit_blit_info(cmd, cs, iview, tiling->gmem_offsets[gmem_index++], - 0); - tu6_emit_blit(cmd, cs); - } -} - -static void -tu6_emit_restart_index(struct tu_cs *cs, uint32_t restart_index) -{ - tu_cs_emit_pkt4(cs, REG_A6XX_PC_RESTART_INDEX, 1); - tu_cs_emit(cs, restart_index); -} - -static void -tu6_init_hw(struct tu_cmd_buffer *cmd, struct tu_cs *cs) -{ - VkResult result = tu_cs_reserve_space(cmd->device, cs, 256); - if (result != VK_SUCCESS) { - cmd->record_result = result; - return; - } - - tu6_emit_cache_flush(cmd, cs); - - tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UPDATE_CNTL, 0xfffff); - - tu_cs_emit_write_reg(cs, REG_A6XX_RB_CCU_CNTL, 0x7c400004); - tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8E04, 0x00100000); - tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AE04, 0x8); - tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AE00, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AE0F, 0x3f); - tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_B605, 0x44); - tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_B600, 0x100000); - tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UNKNOWN_BE00, 0x80); - tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UNKNOWN_BE01, 0); - - tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9600, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_8600, 0x880); - tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UNKNOWN_BE04, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AE03, 0x00000410); - tu_cs_emit_write_reg(cs, REG_A6XX_SP_IBO_COUNT, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_B182, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_UNKNOWN_BB11, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_UCHE_UNKNOWN_0E12, 0x3200000); - tu_cs_emit_write_reg(cs, REG_A6XX_UCHE_CLIENT_PF, 4); - tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8E01, 0x0); - tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_AB00, 0x5); - tu_cs_emit_write_reg(cs, REG_A6XX_VFD_UNKNOWN_A009, 0x00000001); - tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8811, 0x00000010); - tu_cs_emit_write_reg(cs, REG_A6XX_PC_MODE_CNTL, 0x1f); - - tu_cs_emit_write_reg(cs, REG_A6XX_RB_SRGB_CNTL, 0); - - tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_8101, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_SAMPLE_CNTL, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_8110, 0); - - tu_cs_emit_write_reg(cs, REG_A6XX_RB_RENDER_CONTROL0, 0x401); - tu_cs_emit_write_reg(cs, REG_A6XX_RB_RENDER_CONTROL1, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_RB_FS_OUTPUT_CNTL0, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_RB_SAMPLE_CNTL, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8818, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8819, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881A, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881B, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881C, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881D, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_881E, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_88F0, 0); - - tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9101, 0xffff00); - tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9107, 0); - - tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9236, 1); - tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9300, 0); - - tu_cs_emit_write_reg(cs, REG_A6XX_VPC_SO_OVERRIDE, - A6XX_VPC_SO_OVERRIDE_SO_DISABLE); - - tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9801, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9806, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9980, 0); - - tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9B06, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9B06, 0); - - tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_A81B, 0); - - tu_cs_emit_write_reg(cs, REG_A6XX_SP_UNKNOWN_B183, 0); - - tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_8099, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_809B, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80A0, 2); - tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80AF, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9210, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9211, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9602, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9981, 0x3); - tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9E72, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_VPC_UNKNOWN_9108, 0x3); - tu_cs_emit_write_reg(cs, REG_A6XX_SP_TP_UNKNOWN_B304, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_SP_TP_UNKNOWN_B309, 0x000000a2); - tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8804, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80A4, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80A5, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_UNKNOWN_80A6, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8805, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8806, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8878, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_RB_UNKNOWN_8879, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_HLSQ_CONTROL_5_REG, 0xfc); - - tu6_emit_marker(cmd, cs); - - tu_cs_emit_write_reg(cs, REG_A6XX_VFD_MODE_CNTL, 0x00000000); - - tu_cs_emit_write_reg(cs, REG_A6XX_VFD_UNKNOWN_A008, 0); - - tu_cs_emit_write_reg(cs, REG_A6XX_PC_MODE_CNTL, 0x0000001f); - - /* we don't use this yet.. probably best to disable.. */ - tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3); - tu_cs_emit(cs, CP_SET_DRAW_STATE__0_COUNT(0) | - CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS | - CP_SET_DRAW_STATE__0_GROUP_ID(0)); - tu_cs_emit(cs, CP_SET_DRAW_STATE__1_ADDR_LO(0)); - tu_cs_emit(cs, CP_SET_DRAW_STATE__2_ADDR_HI(0)); - - tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_BASE_LO(0), 3); - tu_cs_emit(cs, 0x00000000); /* VPC_SO_BUFFER_BASE_LO_0 */ - tu_cs_emit(cs, 0x00000000); /* VPC_SO_BUFFER_BASE_HI_0 */ - tu_cs_emit(cs, 0x00000000); /* VPC_SO_BUFFER_SIZE_0 */ - - tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_FLUSH_BASE_LO(0), 2); - tu_cs_emit(cs, 0x00000000); /* VPC_SO_FLUSH_BASE_LO_0 */ - tu_cs_emit(cs, 0x00000000); /* VPC_SO_FLUSH_BASE_HI_0 */ - - tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUF_CNTL, 1); - tu_cs_emit(cs, 0x00000000); /* VPC_SO_BUF_CNTL */ - - tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_OFFSET(0), 1); - tu_cs_emit(cs, 0x00000000); /* UNKNOWN_E2AB */ - - tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_BASE_LO(1), 3); - tu_cs_emit(cs, 0x00000000); - tu_cs_emit(cs, 0x00000000); - tu_cs_emit(cs, 0x00000000); - - tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_OFFSET(1), 6); - tu_cs_emit(cs, 0x00000000); - tu_cs_emit(cs, 0x00000000); - tu_cs_emit(cs, 0x00000000); - tu_cs_emit(cs, 0x00000000); - tu_cs_emit(cs, 0x00000000); - tu_cs_emit(cs, 0x00000000); - - tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_OFFSET(2), 6); - tu_cs_emit(cs, 0x00000000); - tu_cs_emit(cs, 0x00000000); - tu_cs_emit(cs, 0x00000000); - tu_cs_emit(cs, 0x00000000); - tu_cs_emit(cs, 0x00000000); - tu_cs_emit(cs, 0x00000000); - - tu_cs_emit_pkt4(cs, REG_A6XX_VPC_SO_BUFFER_OFFSET(3), 3); - tu_cs_emit(cs, 0x00000000); - tu_cs_emit(cs, 0x00000000); - tu_cs_emit(cs, 0x00000000); - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_HS_CTRL_REG0, 1); - tu_cs_emit(cs, 0x00000000); - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_CTRL_REG0, 1); - tu_cs_emit(cs, 0x00000000); - - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_LRZ_CNTL, 1); - tu_cs_emit(cs, 0x00000000); - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_LRZ_CNTL, 1); - tu_cs_emit(cs, 0x00000000); - - tu_cs_sanity_check(cs); -} - -static void -tu6_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs) -{ - VkResult result = tu_cs_reserve_space(cmd->device, cs, 256); - if (result != VK_SUCCESS) { - cmd->record_result = result; - return; - } - - tu6_emit_lrz_flush(cmd, cs); - - /* lrz clear? */ - - tu6_emit_cache_flush(cmd, cs); - - tu_cs_emit_pkt7(cs, CP_SKIP_IB2_ENABLE_GLOBAL, 1); - tu_cs_emit(cs, 0x0); - - /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */ - tu6_emit_wfi(cmd, cs); - tu_cs_emit_pkt4(cs, REG_A6XX_RB_CCU_CNTL, 1); - tu_cs_emit(cs, 0x7c400004); /* RB_CCU_CNTL */ - - tu6_emit_zs(cmd, cs); - tu6_emit_mrt(cmd, cs); - tu6_emit_msaa(cmd, cs); - - if (false) { - /* hw binning? */ - } else { - tu6_emit_bin_size(cmd, cs, 0x6000000); - /* no draws */ - } - - tu6_emit_render_cntl(cmd, cs, false); - - tu_cs_sanity_check(cs); -} - -static void -tu6_render_tile(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - const struct tu_tile *tile) -{ - const uint32_t render_tile_space = 64 + tu_cs_get_call_size(&cmd->draw_cs); - VkResult result = tu_cs_reserve_space(cmd->device, cs, render_tile_space); - if (result != VK_SUCCESS) { - cmd->record_result = result; - return; - } - - tu6_emit_tile_select(cmd, cs, tile); - tu_cs_emit_ib(cs, &cmd->state.tile_load_ib); - - tu_cs_emit_call(cs, &cmd->draw_cs); - cmd->wait_for_idle = true; - - tu_cs_emit_ib(cs, &cmd->state.tile_store_ib); - - tu_cs_sanity_check(cs); -} - -static void -tu6_render_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs) -{ - VkResult result = tu_cs_reserve_space(cmd->device, cs, 16); - if (result != VK_SUCCESS) { - cmd->record_result = result; - return; - } - - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_LRZ_CNTL, 1); - tu_cs_emit(cs, A6XX_GRAS_LRZ_CNTL_ENABLE | A6XX_GRAS_LRZ_CNTL_UNK3); - - tu6_emit_lrz_flush(cmd, cs); - - tu6_emit_event_write(cmd, cs, CACHE_FLUSH_TS, true); - - tu_cs_sanity_check(cs); -} - -static void -tu_cmd_render_tiles(struct tu_cmd_buffer *cmd) -{ - const struct tu_tiling_config *tiling = &cmd->state.tiling_config; - - tu6_render_begin(cmd, &cmd->cs); - - for (uint32_t y = 0; y < tiling->tile_count.height; y++) { - for (uint32_t x = 0; x < tiling->tile_count.width; x++) { - struct tu_tile tile; - tu_tiling_config_get_tile(tiling, cmd->device, x, y, &tile); - tu6_render_tile(cmd, &cmd->cs, &tile); - } - } - - tu6_render_end(cmd, &cmd->cs); -} - -static void -tu_cmd_prepare_tile_load_ib(struct tu_cmd_buffer *cmd) -{ - const uint32_t tile_load_space = 16 + 32 * MAX_RTS; - const struct tu_subpass *subpass = cmd->state.subpass; - struct tu_attachment_state *attachments = cmd->state.attachments; - struct tu_cs sub_cs; - - VkResult result = tu_cs_begin_sub_stream(cmd->device, &cmd->tile_cs, - tile_load_space, &sub_cs); - if (result != VK_SUCCESS) { - cmd->record_result = result; - return; - } - - /* emit to tile-load sub_cs */ - tu6_emit_tile_load(cmd, &sub_cs); - - cmd->state.tile_load_ib = tu_cs_end_sub_stream(&cmd->tile_cs, &sub_cs); - - for (uint32_t i = 0; i < subpass->color_count; ++i) { - const uint32_t a = subpass->color_attachments[i].attachment; - if (a != VK_ATTACHMENT_UNUSED) - attachments[a].pending_clear_aspects = 0; - } -} - -static void -tu_cmd_prepare_tile_store_ib(struct tu_cmd_buffer *cmd) -{ - const uint32_t tile_store_space = 32 + 32 * MAX_RTS; - struct tu_cs sub_cs; - - VkResult result = tu_cs_begin_sub_stream(cmd->device, &cmd->tile_cs, - tile_store_space, &sub_cs); - if (result != VK_SUCCESS) { - cmd->record_result = result; - return; - } - - /* emit to tile-store sub_cs */ - tu6_emit_tile_store(cmd, &sub_cs); - - cmd->state.tile_store_ib = tu_cs_end_sub_stream(&cmd->tile_cs, &sub_cs); -} - -static void -tu_cmd_update_tiling_config(struct tu_cmd_buffer *cmd, - const VkRect2D *render_area) -{ - const struct tu_device *dev = cmd->device; - const struct tu_render_pass *pass = cmd->state.pass; - const struct tu_subpass *subpass = cmd->state.subpass; - struct tu_tiling_config *tiling = &cmd->state.tiling_config; - - uint32_t buffer_cpp[MAX_RTS + 2]; - uint32_t buffer_count = 0; - - for (uint32_t i = 0; i < subpass->color_count; ++i) { - const uint32_t a = subpass->color_attachments[i].attachment; - if (a == VK_ATTACHMENT_UNUSED) - continue; - - const struct tu_render_pass_attachment *att = &pass->attachments[a]; - buffer_cpp[buffer_count++] = - vk_format_get_blocksize(att->format) * att->samples; - } - - if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) { - const uint32_t a = subpass->depth_stencil_attachment.attachment; - const struct tu_render_pass_attachment *att = &pass->attachments[a]; - - /* TODO */ - assert(att->format != VK_FORMAT_D32_SFLOAT_S8_UINT); - - buffer_cpp[buffer_count++] = - vk_format_get_blocksize(att->format) * att->samples; - } - - tu_tiling_config_update(tiling, dev, buffer_cpp, buffer_count, - render_area); -} - -const struct tu_dynamic_state default_dynamic_state = { - .viewport = - { - .count = 0, - }, - .scissor = - { - .count = 0, - }, - .line_width = 1.0f, - .depth_bias = - { - .bias = 0.0f, - .clamp = 0.0f, - .slope = 0.0f, - }, - .blend_constants = { 0.0f, 0.0f, 0.0f, 0.0f }, - .depth_bounds = - { - .min = 0.0f, - .max = 1.0f, - }, - .stencil_compare_mask = - { - .front = ~0u, - .back = ~0u, - }, - .stencil_write_mask = - { - .front = ~0u, - .back = ~0u, - }, - .stencil_reference = - { - .front = 0u, - .back = 0u, - }, -}; - -static void UNUSED /* FINISHME */ -tu_bind_dynamic_state(struct tu_cmd_buffer *cmd_buffer, - const struct tu_dynamic_state *src) -{ - struct tu_dynamic_state *dest = &cmd_buffer->state.dynamic; - uint32_t copy_mask = src->mask; - uint32_t dest_mask = 0; - - tu_use_args(cmd_buffer); /* FINISHME */ - - /* Make sure to copy the number of viewports/scissors because they can - * only be specified at pipeline creation time. - */ - dest->viewport.count = src->viewport.count; - dest->scissor.count = src->scissor.count; - dest->discard_rectangle.count = src->discard_rectangle.count; - - if (copy_mask & TU_DYNAMIC_VIEWPORT) { - if (memcmp(&dest->viewport.viewports, &src->viewport.viewports, - src->viewport.count * sizeof(VkViewport))) { - typed_memcpy(dest->viewport.viewports, src->viewport.viewports, - src->viewport.count); - dest_mask |= TU_DYNAMIC_VIEWPORT; - } - } - - if (copy_mask & TU_DYNAMIC_SCISSOR) { - if (memcmp(&dest->scissor.scissors, &src->scissor.scissors, - src->scissor.count * sizeof(VkRect2D))) { - typed_memcpy(dest->scissor.scissors, src->scissor.scissors, - src->scissor.count); - dest_mask |= TU_DYNAMIC_SCISSOR; - } - } - - if (copy_mask & TU_DYNAMIC_LINE_WIDTH) { - if (dest->line_width != src->line_width) { - dest->line_width = src->line_width; - dest_mask |= TU_DYNAMIC_LINE_WIDTH; - } - } - - if (copy_mask & TU_DYNAMIC_DEPTH_BIAS) { - if (memcmp(&dest->depth_bias, &src->depth_bias, - sizeof(src->depth_bias))) { - dest->depth_bias = src->depth_bias; - dest_mask |= TU_DYNAMIC_DEPTH_BIAS; - } - } - - if (copy_mask & TU_DYNAMIC_BLEND_CONSTANTS) { - if (memcmp(&dest->blend_constants, &src->blend_constants, - sizeof(src->blend_constants))) { - typed_memcpy(dest->blend_constants, src->blend_constants, 4); - dest_mask |= TU_DYNAMIC_BLEND_CONSTANTS; - } - } - - if (copy_mask & TU_DYNAMIC_DEPTH_BOUNDS) { - if (memcmp(&dest->depth_bounds, &src->depth_bounds, - sizeof(src->depth_bounds))) { - dest->depth_bounds = src->depth_bounds; - dest_mask |= TU_DYNAMIC_DEPTH_BOUNDS; - } - } - - if (copy_mask & TU_DYNAMIC_STENCIL_COMPARE_MASK) { - if (memcmp(&dest->stencil_compare_mask, &src->stencil_compare_mask, - sizeof(src->stencil_compare_mask))) { - dest->stencil_compare_mask = src->stencil_compare_mask; - dest_mask |= TU_DYNAMIC_STENCIL_COMPARE_MASK; - } - } - - if (copy_mask & TU_DYNAMIC_STENCIL_WRITE_MASK) { - if (memcmp(&dest->stencil_write_mask, &src->stencil_write_mask, - sizeof(src->stencil_write_mask))) { - dest->stencil_write_mask = src->stencil_write_mask; - dest_mask |= TU_DYNAMIC_STENCIL_WRITE_MASK; - } - } - - if (copy_mask & TU_DYNAMIC_STENCIL_REFERENCE) { - if (memcmp(&dest->stencil_reference, &src->stencil_reference, - sizeof(src->stencil_reference))) { - dest->stencil_reference = src->stencil_reference; - dest_mask |= TU_DYNAMIC_STENCIL_REFERENCE; - } - } - - if (copy_mask & TU_DYNAMIC_DISCARD_RECTANGLE) { - if (memcmp(&dest->discard_rectangle.rectangles, - &src->discard_rectangle.rectangles, - src->discard_rectangle.count * sizeof(VkRect2D))) { - typed_memcpy(dest->discard_rectangle.rectangles, - src->discard_rectangle.rectangles, - src->discard_rectangle.count); - dest_mask |= TU_DYNAMIC_DISCARD_RECTANGLE; - } - } -} - -static VkResult -tu_create_cmd_buffer(struct tu_device *device, - struct tu_cmd_pool *pool, - VkCommandBufferLevel level, - VkCommandBuffer *pCommandBuffer) -{ - struct tu_cmd_buffer *cmd_buffer; - cmd_buffer = vk_zalloc(&pool->alloc, sizeof(*cmd_buffer), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (cmd_buffer == NULL) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - - cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC; - cmd_buffer->device = device; - cmd_buffer->pool = pool; - cmd_buffer->level = level; - - if (pool) { - list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); - cmd_buffer->queue_family_index = pool->queue_family_index; - - } else { - /* Init the pool_link so we can safely call list_del when we destroy - * the command buffer - */ - list_inithead(&cmd_buffer->pool_link); - cmd_buffer->queue_family_index = TU_QUEUE_GENERAL; - } - - tu_bo_list_init(&cmd_buffer->bo_list); - tu_cs_init(&cmd_buffer->cs, TU_CS_MODE_GROW, 4096); - tu_cs_init(&cmd_buffer->draw_cs, TU_CS_MODE_GROW, 4096); - tu_cs_init(&cmd_buffer->tile_cs, TU_CS_MODE_SUB_STREAM, 1024); - - *pCommandBuffer = tu_cmd_buffer_to_handle(cmd_buffer); - - list_inithead(&cmd_buffer->upload.list); - - cmd_buffer->marker_reg = REG_A6XX_CP_SCRATCH_REG( - cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY ? 7 : 6); - - VkResult result = tu_bo_init_new(device, &cmd_buffer->scratch_bo, 0x1000); - if (result != VK_SUCCESS) - return result; - - return VK_SUCCESS; -} - -static void -tu_cmd_buffer_destroy(struct tu_cmd_buffer *cmd_buffer) -{ - tu_bo_finish(cmd_buffer->device, &cmd_buffer->scratch_bo); - - list_del(&cmd_buffer->pool_link); - - for (unsigned i = 0; i < VK_PIPELINE_BIND_POINT_RANGE_SIZE; i++) - free(cmd_buffer->descriptors[i].push_set.set.mapped_ptr); - - tu_cs_finish(cmd_buffer->device, &cmd_buffer->cs); - tu_cs_finish(cmd_buffer->device, &cmd_buffer->draw_cs); - tu_cs_finish(cmd_buffer->device, &cmd_buffer->tile_cs); - - tu_bo_list_destroy(&cmd_buffer->bo_list); - vk_free(&cmd_buffer->pool->alloc, cmd_buffer); -} - -static VkResult -tu_reset_cmd_buffer(struct tu_cmd_buffer *cmd_buffer) -{ - cmd_buffer->wait_for_idle = true; - - cmd_buffer->record_result = VK_SUCCESS; - - tu_bo_list_reset(&cmd_buffer->bo_list); - tu_cs_reset(cmd_buffer->device, &cmd_buffer->cs); - tu_cs_reset(cmd_buffer->device, &cmd_buffer->draw_cs); - tu_cs_reset(cmd_buffer->device, &cmd_buffer->tile_cs); - - for (unsigned i = 0; i < VK_PIPELINE_BIND_POINT_RANGE_SIZE; i++) { - cmd_buffer->descriptors[i].dirty = 0; - cmd_buffer->descriptors[i].valid = 0; - cmd_buffer->descriptors[i].push_dirty = false; - } - - cmd_buffer->status = TU_CMD_BUFFER_STATUS_INITIAL; - - return cmd_buffer->record_result; -} - -static VkResult -tu_cmd_state_setup_attachments(struct tu_cmd_buffer *cmd_buffer, - const VkRenderPassBeginInfo *info) -{ - struct tu_cmd_state *state = &cmd_buffer->state; - const struct tu_framebuffer *fb = state->framebuffer; - const struct tu_render_pass *pass = state->pass; - - for (uint32_t i = 0; i < fb->attachment_count; ++i) { - const struct tu_image_view *iview = fb->attachments[i].attachment; - tu_bo_list_add(&cmd_buffer->bo_list, iview->image->bo, - MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE); - } - - if (pass->attachment_count == 0) { - state->attachments = NULL; - return VK_SUCCESS; - } - - state->attachments = - vk_alloc(&cmd_buffer->pool->alloc, - pass->attachment_count * sizeof(state->attachments[0]), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (state->attachments == NULL) { - cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY; - return cmd_buffer->record_result; - } - - for (uint32_t i = 0; i < pass->attachment_count; ++i) { - const struct tu_render_pass_attachment *att = &pass->attachments[i]; - VkImageAspectFlags att_aspects = vk_format_aspects(att->format); - VkImageAspectFlags clear_aspects = 0; - - if (att_aspects == VK_IMAGE_ASPECT_COLOR_BIT) { - /* color attachment */ - if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - clear_aspects |= VK_IMAGE_ASPECT_COLOR_BIT; - } - } else { - /* depthstencil attachment */ - if ((att_aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && - att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - clear_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; - if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && - att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE) - clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; - } - if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && - att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { - clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; - } - } - - state->attachments[i].pending_clear_aspects = clear_aspects; - state->attachments[i].cleared_views = 0; - if (clear_aspects && info) { - assert(info->clearValueCount > i); - state->attachments[i].clear_value = info->pClearValues[i]; - } - - state->attachments[i].current_layout = att->initial_layout; - } - - return VK_SUCCESS; -} - -VkResult -tu_AllocateCommandBuffers(VkDevice _device, - const VkCommandBufferAllocateInfo *pAllocateInfo, - VkCommandBuffer *pCommandBuffers) -{ - TU_FROM_HANDLE(tu_device, device, _device); - TU_FROM_HANDLE(tu_cmd_pool, pool, pAllocateInfo->commandPool); - - VkResult result = VK_SUCCESS; - uint32_t i; - - for (i = 0; i < pAllocateInfo->commandBufferCount; i++) { - - if (!list_empty(&pool->free_cmd_buffers)) { - struct tu_cmd_buffer *cmd_buffer = list_first_entry( - &pool->free_cmd_buffers, struct tu_cmd_buffer, pool_link); - - list_del(&cmd_buffer->pool_link); - list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); - - result = tu_reset_cmd_buffer(cmd_buffer); - cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC; - cmd_buffer->level = pAllocateInfo->level; - - pCommandBuffers[i] = tu_cmd_buffer_to_handle(cmd_buffer); - } else { - result = tu_create_cmd_buffer(device, pool, pAllocateInfo->level, - &pCommandBuffers[i]); - } - if (result != VK_SUCCESS) - break; - } - - if (result != VK_SUCCESS) { - tu_FreeCommandBuffers(_device, pAllocateInfo->commandPool, i, - pCommandBuffers); - - /* From the Vulkan 1.0.66 spec: - * - * "vkAllocateCommandBuffers can be used to create multiple - * command buffers. If the creation of any of those command - * buffers fails, the implementation must destroy all - * successfully created command buffer objects from this - * command, set all entries of the pCommandBuffers array to - * NULL and return the error." - */ - memset(pCommandBuffers, 0, - sizeof(*pCommandBuffers) * pAllocateInfo->commandBufferCount); - } - - return result; -} - -void -tu_FreeCommandBuffers(VkDevice device, - VkCommandPool commandPool, - uint32_t commandBufferCount, - const VkCommandBuffer *pCommandBuffers) -{ - for (uint32_t i = 0; i < commandBufferCount; i++) { - TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, pCommandBuffers[i]); - - if (cmd_buffer) { - if (cmd_buffer->pool) { - list_del(&cmd_buffer->pool_link); - list_addtail(&cmd_buffer->pool_link, - &cmd_buffer->pool->free_cmd_buffers); - } else - tu_cmd_buffer_destroy(cmd_buffer); - } - } -} - -VkResult -tu_ResetCommandBuffer(VkCommandBuffer commandBuffer, - VkCommandBufferResetFlags flags) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); - return tu_reset_cmd_buffer(cmd_buffer); -} - -VkResult -tu_BeginCommandBuffer(VkCommandBuffer commandBuffer, - const VkCommandBufferBeginInfo *pBeginInfo) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); - VkResult result = VK_SUCCESS; - - if (cmd_buffer->status != TU_CMD_BUFFER_STATUS_INITIAL) { - /* If the command buffer has already been resetted with - * vkResetCommandBuffer, no need to do it again. - */ - result = tu_reset_cmd_buffer(cmd_buffer); - if (result != VK_SUCCESS) - return result; - } - - memset(&cmd_buffer->state, 0, sizeof(cmd_buffer->state)); - cmd_buffer->usage_flags = pBeginInfo->flags; - - tu_cs_begin(&cmd_buffer->cs); - - cmd_buffer->marker_seqno = 0; - cmd_buffer->scratch_seqno = 0; - - /* setup initial configuration into command buffer */ - if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { - switch (cmd_buffer->queue_family_index) { - case TU_QUEUE_GENERAL: - tu6_init_hw(cmd_buffer, &cmd_buffer->cs); - break; - default: - break; - } - } - - cmd_buffer->status = TU_CMD_BUFFER_STATUS_RECORDING; - - return VK_SUCCESS; -} - -void -tu_CmdBindVertexBuffers(VkCommandBuffer commandBuffer, - uint32_t firstBinding, - uint32_t bindingCount, - const VkBuffer *pBuffers, - const VkDeviceSize *pOffsets) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - - assert(firstBinding + bindingCount <= MAX_VBS); - - for (uint32_t i = 0; i < bindingCount; i++) { - cmd->state.vb.buffers[firstBinding + i] = - tu_buffer_from_handle(pBuffers[i]); - cmd->state.vb.offsets[firstBinding + i] = pOffsets[i]; - } - - /* VB states depend on VkPipelineVertexInputStateCreateInfo */ - cmd->state.dirty |= TU_CMD_DIRTY_VERTEX_BUFFERS; -} - -void -tu_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, - VkBuffer buffer, - VkDeviceSize offset, - VkIndexType indexType) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - TU_FROM_HANDLE(tu_buffer, buf, buffer); - - /* initialize/update the restart index */ - if (!cmd->state.index_buffer || cmd->state.index_type != indexType) { - struct tu_cs *draw_cs = &cmd->draw_cs; - VkResult result = tu_cs_reserve_space(cmd->device, draw_cs, 2); - if (result != VK_SUCCESS) { - cmd->record_result = result; - return; - } - - tu6_emit_restart_index( - draw_cs, indexType == VK_INDEX_TYPE_UINT32 ? 0xffffffff : 0xffff); - - tu_cs_sanity_check(draw_cs); - } - - /* track the BO */ - if (cmd->state.index_buffer != buf) - tu_bo_list_add(&cmd->bo_list, buf->bo, MSM_SUBMIT_BO_READ); - - cmd->state.index_buffer = buf; - cmd->state.index_offset = offset; - cmd->state.index_type = indexType; -} - -void -tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, - VkPipelineBindPoint pipelineBindPoint, - VkPipelineLayout _layout, - uint32_t firstSet, - uint32_t descriptorSetCount, - const VkDescriptorSet *pDescriptorSets, - uint32_t dynamicOffsetCount, - const uint32_t *pDynamicOffsets) -{ -} - -void -tu_CmdPushConstants(VkCommandBuffer commandBuffer, - VkPipelineLayout layout, - VkShaderStageFlags stageFlags, - uint32_t offset, - uint32_t size, - const void *pValues) -{ -} - -VkResult -tu_EndCommandBuffer(VkCommandBuffer commandBuffer) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); - - if (cmd_buffer->scratch_seqno) { - tu_bo_list_add(&cmd_buffer->bo_list, &cmd_buffer->scratch_bo, - MSM_SUBMIT_BO_WRITE); - } - - for (uint32_t i = 0; i < cmd_buffer->draw_cs.bo_count; i++) { - tu_bo_list_add(&cmd_buffer->bo_list, cmd_buffer->draw_cs.bos[i], - MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP); - } - - for (uint32_t i = 0; i < cmd_buffer->tile_cs.bo_count; i++) { - tu_bo_list_add(&cmd_buffer->bo_list, cmd_buffer->tile_cs.bos[i], - MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP); - } - - tu_cs_end(&cmd_buffer->cs); - - assert(!cmd_buffer->state.attachments); - - cmd_buffer->status = TU_CMD_BUFFER_STATUS_EXECUTABLE; - - return cmd_buffer->record_result; -} - -void -tu_CmdBindPipeline(VkCommandBuffer commandBuffer, - VkPipelineBindPoint pipelineBindPoint, - VkPipeline _pipeline) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - TU_FROM_HANDLE(tu_pipeline, pipeline, _pipeline); - - switch (pipelineBindPoint) { - case VK_PIPELINE_BIND_POINT_GRAPHICS: - cmd->state.pipeline = pipeline; - cmd->state.dirty |= TU_CMD_DIRTY_PIPELINE; - break; - case VK_PIPELINE_BIND_POINT_COMPUTE: - tu_finishme("binding compute pipeline"); - break; - default: - unreachable("unrecognized pipeline bind point"); - break; - } -} - -void -tu_CmdSetViewport(VkCommandBuffer commandBuffer, - uint32_t firstViewport, - uint32_t viewportCount, - const VkViewport *pViewports) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - struct tu_cs *draw_cs = &cmd->draw_cs; - - VkResult result = tu_cs_reserve_space(cmd->device, draw_cs, 12); - if (result != VK_SUCCESS) { - cmd->record_result = result; - return; - } - - assert(firstViewport == 0 && viewportCount == 1); - tu6_emit_viewport(draw_cs, pViewports); - - tu_cs_sanity_check(draw_cs); -} - -void -tu_CmdSetScissor(VkCommandBuffer commandBuffer, - uint32_t firstScissor, - uint32_t scissorCount, - const VkRect2D *pScissors) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - struct tu_cs *draw_cs = &cmd->draw_cs; - - VkResult result = tu_cs_reserve_space(cmd->device, draw_cs, 3); - if (result != VK_SUCCESS) { - cmd->record_result = result; - return; - } - - assert(firstScissor == 0 && scissorCount == 1); - tu6_emit_scissor(draw_cs, pScissors); - - tu_cs_sanity_check(draw_cs); -} - -void -tu_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - - cmd->state.dynamic.line_width = lineWidth; - - /* line width depends on VkPipelineRasterizationStateCreateInfo */ - cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_LINE_WIDTH; -} - -void -tu_CmdSetDepthBias(VkCommandBuffer commandBuffer, - float depthBiasConstantFactor, - float depthBiasClamp, - float depthBiasSlopeFactor) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - struct tu_cs *draw_cs = &cmd->draw_cs; - - VkResult result = tu_cs_reserve_space(cmd->device, draw_cs, 4); - if (result != VK_SUCCESS) { - cmd->record_result = result; - return; - } - - tu6_emit_depth_bias(draw_cs, depthBiasConstantFactor, depthBiasClamp, - depthBiasSlopeFactor); - - tu_cs_sanity_check(draw_cs); -} - -void -tu_CmdSetBlendConstants(VkCommandBuffer commandBuffer, - const float blendConstants[4]) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - struct tu_cs *draw_cs = &cmd->draw_cs; - - VkResult result = tu_cs_reserve_space(cmd->device, draw_cs, 5); - if (result != VK_SUCCESS) { - cmd->record_result = result; - return; - } - - tu6_emit_blend_constants(draw_cs, blendConstants); - - tu_cs_sanity_check(draw_cs); -} - -void -tu_CmdSetDepthBounds(VkCommandBuffer commandBuffer, - float minDepthBounds, - float maxDepthBounds) -{ -} - -void -tu_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, - VkStencilFaceFlags faceMask, - uint32_t compareMask) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - - if (faceMask & VK_STENCIL_FACE_FRONT_BIT) - cmd->state.dynamic.stencil_compare_mask.front = compareMask; - if (faceMask & VK_STENCIL_FACE_BACK_BIT) - cmd->state.dynamic.stencil_compare_mask.back = compareMask; - - /* the front/back compare masks must be updated together */ - cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK; -} - -void -tu_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, - VkStencilFaceFlags faceMask, - uint32_t writeMask) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - - if (faceMask & VK_STENCIL_FACE_FRONT_BIT) - cmd->state.dynamic.stencil_write_mask.front = writeMask; - if (faceMask & VK_STENCIL_FACE_BACK_BIT) - cmd->state.dynamic.stencil_write_mask.back = writeMask; - - /* the front/back write masks must be updated together */ - cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK; -} - -void -tu_CmdSetStencilReference(VkCommandBuffer commandBuffer, - VkStencilFaceFlags faceMask, - uint32_t reference) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - - if (faceMask & VK_STENCIL_FACE_FRONT_BIT) - cmd->state.dynamic.stencil_reference.front = reference; - if (faceMask & VK_STENCIL_FACE_BACK_BIT) - cmd->state.dynamic.stencil_reference.back = reference; - - /* the front/back references must be updated together */ - cmd->state.dirty |= TU_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE; -} - -void -tu_CmdExecuteCommands(VkCommandBuffer commandBuffer, - uint32_t commandBufferCount, - const VkCommandBuffer *pCmdBuffers) -{ -} - -VkResult -tu_CreateCommandPool(VkDevice _device, - const VkCommandPoolCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkCommandPool *pCmdPool) -{ - TU_FROM_HANDLE(tu_device, device, _device); - struct tu_cmd_pool *pool; - - pool = vk_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (pool == NULL) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - - if (pAllocator) - pool->alloc = *pAllocator; - else - pool->alloc = device->alloc; - - list_inithead(&pool->cmd_buffers); - list_inithead(&pool->free_cmd_buffers); - - pool->queue_family_index = pCreateInfo->queueFamilyIndex; - - *pCmdPool = tu_cmd_pool_to_handle(pool); - - return VK_SUCCESS; -} - -void -tu_DestroyCommandPool(VkDevice _device, - VkCommandPool commandPool, - const VkAllocationCallbacks *pAllocator) -{ - TU_FROM_HANDLE(tu_device, device, _device); - TU_FROM_HANDLE(tu_cmd_pool, pool, commandPool); - - if (!pool) - return; - - list_for_each_entry_safe(struct tu_cmd_buffer, cmd_buffer, - &pool->cmd_buffers, pool_link) - { - tu_cmd_buffer_destroy(cmd_buffer); - } - - list_for_each_entry_safe(struct tu_cmd_buffer, cmd_buffer, - &pool->free_cmd_buffers, pool_link) - { - tu_cmd_buffer_destroy(cmd_buffer); - } - - vk_free2(&device->alloc, pAllocator, pool); -} - -VkResult -tu_ResetCommandPool(VkDevice device, - VkCommandPool commandPool, - VkCommandPoolResetFlags flags) -{ - TU_FROM_HANDLE(tu_cmd_pool, pool, commandPool); - VkResult result; - - list_for_each_entry(struct tu_cmd_buffer, cmd_buffer, &pool->cmd_buffers, - pool_link) - { - result = tu_reset_cmd_buffer(cmd_buffer); - if (result != VK_SUCCESS) - return result; - } - - return VK_SUCCESS; -} - -void -tu_TrimCommandPool(VkDevice device, - VkCommandPool commandPool, - VkCommandPoolTrimFlags flags) -{ - TU_FROM_HANDLE(tu_cmd_pool, pool, commandPool); - - if (!pool) - return; - - list_for_each_entry_safe(struct tu_cmd_buffer, cmd_buffer, - &pool->free_cmd_buffers, pool_link) - { - tu_cmd_buffer_destroy(cmd_buffer); - } -} - -void -tu_CmdBeginRenderPass(VkCommandBuffer commandBuffer, - const VkRenderPassBeginInfo *pRenderPassBegin, - VkSubpassContents contents) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); - TU_FROM_HANDLE(tu_render_pass, pass, pRenderPassBegin->renderPass); - TU_FROM_HANDLE(tu_framebuffer, framebuffer, pRenderPassBegin->framebuffer); - VkResult result; - - cmd_buffer->state.pass = pass; - cmd_buffer->state.subpass = pass->subpasses; - cmd_buffer->state.framebuffer = framebuffer; - - result = tu_cmd_state_setup_attachments(cmd_buffer, pRenderPassBegin); - if (result != VK_SUCCESS) - return; - - tu_cmd_update_tiling_config(cmd_buffer, &pRenderPassBegin->renderArea); - tu_cmd_prepare_tile_load_ib(cmd_buffer); - tu_cmd_prepare_tile_store_ib(cmd_buffer); - - /* draw_cs should contain entries only for this render pass */ - assert(!cmd_buffer->draw_cs.entry_count); - tu_cs_begin(&cmd_buffer->draw_cs); -} - -void -tu_CmdBeginRenderPass2KHR(VkCommandBuffer commandBuffer, - const VkRenderPassBeginInfo *pRenderPassBeginInfo, - const VkSubpassBeginInfoKHR *pSubpassBeginInfo) -{ - tu_CmdBeginRenderPass(commandBuffer, pRenderPassBeginInfo, - pSubpassBeginInfo->contents); -} - -void -tu_CmdNextSubpass(VkCommandBuffer commandBuffer, VkSubpassContents contents) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); - - tu_cmd_render_tiles(cmd); - - cmd->state.subpass++; - - tu_cmd_update_tiling_config(cmd, NULL); - tu_cmd_prepare_tile_load_ib(cmd); - tu_cmd_prepare_tile_store_ib(cmd); -} - -void -tu_CmdNextSubpass2KHR(VkCommandBuffer commandBuffer, - const VkSubpassBeginInfoKHR *pSubpassBeginInfo, - const VkSubpassEndInfoKHR *pSubpassEndInfo) -{ - tu_CmdNextSubpass(commandBuffer, pSubpassBeginInfo->contents); -} - -struct tu_draw_info -{ - /** - * Number of vertices. - */ - uint32_t count; - - /** - * Index of the first vertex. - */ - int32_t vertex_offset; - - /** - * First instance id. - */ - uint32_t first_instance; - - /** - * Number of instances. - */ - uint32_t instance_count; - - /** - * First index (indexed draws only). - */ - uint32_t first_index; - - /** - * Whether it's an indexed draw. - */ - bool indexed; - - /** - * Indirect draw parameters resource. - */ - struct tu_buffer *indirect; - uint64_t indirect_offset; - uint32_t stride; - - /** - * Draw count parameters resource. - */ - struct tu_buffer *count_buffer; - uint64_t count_buffer_offset; -}; - -enum tu_draw_state_group_id -{ - TU_DRAW_STATE_PROGRAM, - TU_DRAW_STATE_PROGRAM_BINNING, - TU_DRAW_STATE_VI, - TU_DRAW_STATE_VI_BINNING, - TU_DRAW_STATE_VP, - TU_DRAW_STATE_RAST, - TU_DRAW_STATE_DS, - TU_DRAW_STATE_BLEND, - - TU_DRAW_STATE_COUNT, -}; - -struct tu_draw_state_group -{ - enum tu_draw_state_group_id id; - uint32_t enable_mask; - const struct tu_cs_entry *ib; -}; - -static void -tu6_bind_draw_states(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - const struct tu_draw_info *draw) -{ - const struct tu_pipeline *pipeline = cmd->state.pipeline; - const struct tu_dynamic_state *dynamic = &cmd->state.dynamic; - struct tu_draw_state_group draw_state_groups[TU_DRAW_STATE_COUNT]; - uint32_t draw_state_group_count = 0; - - VkResult result = tu_cs_reserve_space(cmd->device, cs, 256); - if (result != VK_SUCCESS) { - cmd->record_result = result; - return; - } - - /* TODO lrz */ - - uint32_t pc_primitive_cntl = 0; - if (pipeline->ia.primitive_restart && draw->indexed) - pc_primitive_cntl |= A6XX_PC_PRIMITIVE_CNTL_0_PRIMITIVE_RESTART; - - tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9806, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_PC_UNKNOWN_9990, 0); - tu_cs_emit_write_reg(cs, REG_A6XX_VFD_UNKNOWN_A008, 0); - - tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMITIVE_CNTL_0, 1); - tu_cs_emit(cs, pc_primitive_cntl); - - if (cmd->state.dirty & - (TU_CMD_DIRTY_PIPELINE | TU_CMD_DIRTY_DYNAMIC_LINE_WIDTH) && - (pipeline->dynamic_state.mask & TU_DYNAMIC_LINE_WIDTH)) { - tu6_emit_gras_su_cntl(cs, pipeline->rast.gras_su_cntl, - dynamic->line_width); - } - - if ((cmd->state.dirty & TU_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK) && - (pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_COMPARE_MASK)) { - tu6_emit_stencil_compare_mask(cs, dynamic->stencil_compare_mask.front, - dynamic->stencil_compare_mask.back); - } - - if ((cmd->state.dirty & TU_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK) && - (pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_WRITE_MASK)) { - tu6_emit_stencil_write_mask(cs, dynamic->stencil_write_mask.front, - dynamic->stencil_write_mask.back); - } - - if ((cmd->state.dirty & TU_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE) && - (pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_REFERENCE)) { - tu6_emit_stencil_reference(cs, dynamic->stencil_reference.front, - dynamic->stencil_reference.back); - } - - if (cmd->state.dirty & - (TU_CMD_DIRTY_PIPELINE | TU_CMD_DIRTY_VERTEX_BUFFERS)) { - for (uint32_t i = 0; i < pipeline->vi.count; i++) { - const uint32_t binding = pipeline->vi.bindings[i]; - const uint32_t stride = pipeline->vi.strides[i]; - const struct tu_buffer *buf = cmd->state.vb.buffers[binding]; - const VkDeviceSize offset = buf->bo_offset + - cmd->state.vb.offsets[binding] + - pipeline->vi.offsets[i]; - const VkDeviceSize size = - offset < buf->bo->size ? buf->bo->size - offset : 0; - - tu_cs_emit_pkt4(cs, REG_A6XX_VFD_FETCH(i), 4); - tu_cs_emit_qw(cs, buf->bo->iova + offset); - tu_cs_emit(cs, size); - tu_cs_emit(cs, stride); - } - } - - /* TODO shader consts */ - - if (cmd->state.dirty & TU_CMD_DIRTY_PIPELINE) { - draw_state_groups[draw_state_group_count++] = - (struct tu_draw_state_group) { - .id = TU_DRAW_STATE_PROGRAM, - .enable_mask = 0x6, - .ib = &pipeline->program.state_ib, - }; - draw_state_groups[draw_state_group_count++] = - (struct tu_draw_state_group) { - .id = TU_DRAW_STATE_PROGRAM_BINNING, - .enable_mask = 0x1, - .ib = &pipeline->program.binning_state_ib, - }; - draw_state_groups[draw_state_group_count++] = - (struct tu_draw_state_group) { - .id = TU_DRAW_STATE_VI, - .enable_mask = 0x6, - .ib = &pipeline->vi.state_ib, - }; - draw_state_groups[draw_state_group_count++] = - (struct tu_draw_state_group) { - .id = TU_DRAW_STATE_VI_BINNING, - .enable_mask = 0x1, - .ib = &pipeline->vi.binning_state_ib, - }; - draw_state_groups[draw_state_group_count++] = - (struct tu_draw_state_group) { - .id = TU_DRAW_STATE_VP, - .enable_mask = 0x7, - .ib = &pipeline->vp.state_ib, - }; - draw_state_groups[draw_state_group_count++] = - (struct tu_draw_state_group) { - .id = TU_DRAW_STATE_RAST, - .enable_mask = 0x7, - .ib = &pipeline->rast.state_ib, - }; - draw_state_groups[draw_state_group_count++] = - (struct tu_draw_state_group) { - .id = TU_DRAW_STATE_DS, - .enable_mask = 0x7, - .ib = &pipeline->ds.state_ib, - }; - draw_state_groups[draw_state_group_count++] = - (struct tu_draw_state_group) { - .id = TU_DRAW_STATE_BLEND, - .enable_mask = 0x7, - .ib = &pipeline->blend.state_ib, - }; - } - - tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * draw_state_group_count); - for (uint32_t i = 0; i < draw_state_group_count; i++) { - const struct tu_draw_state_group *group = &draw_state_groups[i]; - - uint32_t cp_set_draw_state = - CP_SET_DRAW_STATE__0_COUNT(group->ib->size / 4) | - CP_SET_DRAW_STATE__0_ENABLE_MASK(group->enable_mask) | - CP_SET_DRAW_STATE__0_GROUP_ID(group->id); - uint64_t iova; - if (group->ib->size) { - iova = group->ib->bo->iova + group->ib->offset; - } else { - cp_set_draw_state |= CP_SET_DRAW_STATE__0_DISABLE; - iova = 0; - } - - tu_cs_emit(cs, cp_set_draw_state); - tu_cs_emit_qw(cs, iova); - } - - tu_cs_sanity_check(cs); - - /* track BOs */ - if (cmd->state.dirty & TU_CMD_DIRTY_PIPELINE) { - tu_bo_list_add(&cmd->bo_list, &pipeline->program.binary_bo, - MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP); - for (uint32_t i = 0; i < pipeline->cs.bo_count; i++) { - tu_bo_list_add(&cmd->bo_list, pipeline->cs.bos[i], - MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP); - } - } - if (cmd->state.dirty & TU_CMD_DIRTY_VERTEX_BUFFERS) { - for (uint32_t i = 0; i < MAX_VBS; i++) { - const struct tu_buffer *buf = cmd->state.vb.buffers[i]; - if (buf) - tu_bo_list_add(&cmd->bo_list, buf->bo, MSM_SUBMIT_BO_READ); - } - } - - cmd->state.dirty = 0; -} - -static void -tu6_emit_draw_direct(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - const struct tu_draw_info *draw) -{ - - const enum pc_di_primtype primtype = cmd->state.pipeline->ia.primtype; - - tu_cs_emit_pkt4(cs, REG_A6XX_VFD_INDEX_OFFSET, 2); - tu_cs_emit(cs, draw->vertex_offset); - tu_cs_emit(cs, draw->first_instance); - - /* TODO hw binning */ - if (draw->indexed) { - const enum a4xx_index_size index_size = - tu6_index_size(cmd->state.index_type); - const uint32_t index_bytes = - (cmd->state.index_type == VK_INDEX_TYPE_UINT32) ? 4 : 2; - const struct tu_buffer *buf = cmd->state.index_buffer; - const VkDeviceSize offset = buf->bo_offset + cmd->state.index_offset + - index_bytes * draw->first_index; - const uint32_t size = index_bytes * draw->count; - - const uint32_t cp_draw_indx = - CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(primtype) | - CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_DMA) | - CP_DRAW_INDX_OFFSET_0_INDEX_SIZE(index_size) | - CP_DRAW_INDX_OFFSET_0_VIS_CULL(IGNORE_VISIBILITY) | 0x2000; - - tu_cs_emit_pkt7(cs, CP_DRAW_INDX_OFFSET, 7); - tu_cs_emit(cs, cp_draw_indx); - tu_cs_emit(cs, draw->instance_count); - tu_cs_emit(cs, draw->count); - tu_cs_emit(cs, 0x0); /* XXX */ - tu_cs_emit_qw(cs, buf->bo->iova + offset); - tu_cs_emit(cs, size); - } else { - const uint32_t cp_draw_indx = - CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(primtype) | - CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_AUTO_INDEX) | - CP_DRAW_INDX_OFFSET_0_VIS_CULL(IGNORE_VISIBILITY) | 0x2000; - - tu_cs_emit_pkt7(cs, CP_DRAW_INDX_OFFSET, 3); - tu_cs_emit(cs, cp_draw_indx); - tu_cs_emit(cs, draw->instance_count); - tu_cs_emit(cs, draw->count); - } -} - -static void -tu_draw(struct tu_cmd_buffer *cmd, const struct tu_draw_info *draw) -{ - struct tu_cs *cs = &cmd->draw_cs; - - tu6_bind_draw_states(cmd, cs, draw); - - VkResult result = tu_cs_reserve_space(cmd->device, cs, 32); - if (result != VK_SUCCESS) { - cmd->record_result = result; - return; - } - - if (draw->indirect) { - tu_finishme("indirect draw"); - return; - } - - /* TODO tu6_emit_marker should pick different regs depending on cs */ - tu6_emit_marker(cmd, cs); - tu6_emit_draw_direct(cmd, cs, draw); - tu6_emit_marker(cmd, cs); - - cmd->wait_for_idle = true; - - tu_cs_sanity_check(cs); -} - -void -tu_CmdDraw(VkCommandBuffer commandBuffer, - uint32_t vertexCount, - uint32_t instanceCount, - uint32_t firstVertex, - uint32_t firstInstance) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); - struct tu_draw_info info = {}; - - info.count = vertexCount; - info.instance_count = instanceCount; - info.first_instance = firstInstance; - info.vertex_offset = firstVertex; - - tu_draw(cmd_buffer, &info); -} - -void -tu_CmdDrawIndexed(VkCommandBuffer commandBuffer, - uint32_t indexCount, - uint32_t instanceCount, - uint32_t firstIndex, - int32_t vertexOffset, - uint32_t firstInstance) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); - struct tu_draw_info info = {}; - - info.indexed = true; - info.count = indexCount; - info.instance_count = instanceCount; - info.first_index = firstIndex; - info.vertex_offset = vertexOffset; - info.first_instance = firstInstance; - - tu_draw(cmd_buffer, &info); -} - -void -tu_CmdDrawIndirect(VkCommandBuffer commandBuffer, - VkBuffer _buffer, - VkDeviceSize offset, - uint32_t drawCount, - uint32_t stride) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); - TU_FROM_HANDLE(tu_buffer, buffer, _buffer); - struct tu_draw_info info = {}; - - info.count = drawCount; - info.indirect = buffer; - info.indirect_offset = offset; - info.stride = stride; - - tu_draw(cmd_buffer, &info); -} - -void -tu_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, - VkBuffer _buffer, - VkDeviceSize offset, - uint32_t drawCount, - uint32_t stride) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); - TU_FROM_HANDLE(tu_buffer, buffer, _buffer); - struct tu_draw_info info = {}; - - info.indexed = true; - info.count = drawCount; - info.indirect = buffer; - info.indirect_offset = offset; - info.stride = stride; - - tu_draw(cmd_buffer, &info); -} - -struct tu_dispatch_info -{ - /** - * Determine the layout of the grid (in block units) to be used. - */ - uint32_t blocks[3]; - - /** - * A starting offset for the grid. If unaligned is set, the offset - * must still be aligned. - */ - uint32_t offsets[3]; - /** - * Whether it's an unaligned compute dispatch. - */ - bool unaligned; - - /** - * Indirect compute parameters resource. - */ - struct tu_buffer *indirect; - uint64_t indirect_offset; -}; - -static void -tu_dispatch(struct tu_cmd_buffer *cmd_buffer, - const struct tu_dispatch_info *info) -{ -} - -void -tu_CmdDispatchBase(VkCommandBuffer commandBuffer, - uint32_t base_x, - uint32_t base_y, - uint32_t base_z, - uint32_t x, - uint32_t y, - uint32_t z) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); - struct tu_dispatch_info info = {}; - - info.blocks[0] = x; - info.blocks[1] = y; - info.blocks[2] = z; - - info.offsets[0] = base_x; - info.offsets[1] = base_y; - info.offsets[2] = base_z; - tu_dispatch(cmd_buffer, &info); -} - -void -tu_CmdDispatch(VkCommandBuffer commandBuffer, - uint32_t x, - uint32_t y, - uint32_t z) -{ - tu_CmdDispatchBase(commandBuffer, 0, 0, 0, x, y, z); -} - -void -tu_CmdDispatchIndirect(VkCommandBuffer commandBuffer, - VkBuffer _buffer, - VkDeviceSize offset) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); - TU_FROM_HANDLE(tu_buffer, buffer, _buffer); - struct tu_dispatch_info info = {}; - - info.indirect = buffer; - info.indirect_offset = offset; - - tu_dispatch(cmd_buffer, &info); -} - -void -tu_CmdEndRenderPass(VkCommandBuffer commandBuffer) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); - - tu_cs_end(&cmd_buffer->draw_cs); - - tu_cmd_render_tiles(cmd_buffer); - - /* discard draw_cs entries now that the tiles are rendered */ - tu_cs_discard_entries(&cmd_buffer->draw_cs); - - vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments); - cmd_buffer->state.attachments = NULL; - - cmd_buffer->state.pass = NULL; - cmd_buffer->state.subpass = NULL; - cmd_buffer->state.framebuffer = NULL; -} - -void -tu_CmdEndRenderPass2KHR(VkCommandBuffer commandBuffer, - const VkSubpassEndInfoKHR *pSubpassEndInfo) -{ - tu_CmdEndRenderPass(commandBuffer); -} - -struct tu_barrier_info -{ - uint32_t eventCount; - const VkEvent *pEvents; - VkPipelineStageFlags srcStageMask; -}; - -static void -tu_barrier(struct tu_cmd_buffer *cmd_buffer, - uint32_t memoryBarrierCount, - const VkMemoryBarrier *pMemoryBarriers, - uint32_t bufferMemoryBarrierCount, - const VkBufferMemoryBarrier *pBufferMemoryBarriers, - uint32_t imageMemoryBarrierCount, - const VkImageMemoryBarrier *pImageMemoryBarriers, - const struct tu_barrier_info *info) -{ -} - -void -tu_CmdPipelineBarrier(VkCommandBuffer commandBuffer, - VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags destStageMask, - VkBool32 byRegion, - uint32_t memoryBarrierCount, - const VkMemoryBarrier *pMemoryBarriers, - uint32_t bufferMemoryBarrierCount, - const VkBufferMemoryBarrier *pBufferMemoryBarriers, - uint32_t imageMemoryBarrierCount, - const VkImageMemoryBarrier *pImageMemoryBarriers) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); - struct tu_barrier_info info; - - info.eventCount = 0; - info.pEvents = NULL; - info.srcStageMask = srcStageMask; - - tu_barrier(cmd_buffer, memoryBarrierCount, pMemoryBarriers, - bufferMemoryBarrierCount, pBufferMemoryBarriers, - imageMemoryBarrierCount, pImageMemoryBarriers, &info); -} - -static void -write_event(struct tu_cmd_buffer *cmd_buffer, - struct tu_event *event, - VkPipelineStageFlags stageMask, - unsigned value) -{ -} - -void -tu_CmdSetEvent(VkCommandBuffer commandBuffer, - VkEvent _event, - VkPipelineStageFlags stageMask) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); - TU_FROM_HANDLE(tu_event, event, _event); - - write_event(cmd_buffer, event, stageMask, 1); -} - -void -tu_CmdResetEvent(VkCommandBuffer commandBuffer, - VkEvent _event, - VkPipelineStageFlags stageMask) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); - TU_FROM_HANDLE(tu_event, event, _event); - - write_event(cmd_buffer, event, stageMask, 0); -} - -void -tu_CmdWaitEvents(VkCommandBuffer commandBuffer, - uint32_t eventCount, - const VkEvent *pEvents, - VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - uint32_t memoryBarrierCount, - const VkMemoryBarrier *pMemoryBarriers, - uint32_t bufferMemoryBarrierCount, - const VkBufferMemoryBarrier *pBufferMemoryBarriers, - uint32_t imageMemoryBarrierCount, - const VkImageMemoryBarrier *pImageMemoryBarriers) -{ - TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer); - struct tu_barrier_info info; - - info.eventCount = eventCount; - info.pEvents = pEvents; - info.srcStageMask = 0; - - tu_barrier(cmd_buffer, memoryBarrierCount, pMemoryBarriers, - bufferMemoryBarrierCount, pBufferMemoryBarriers, - imageMemoryBarrierCount, pImageMemoryBarriers, &info); -} - -void -tu_CmdSetDeviceMask(VkCommandBuffer commandBuffer, uint32_t deviceMask) -{ - /* No-op */ -} diff --git a/lib/mesa/src/freedreno/vulkan/tu_cs.c b/lib/mesa/src/freedreno/vulkan/tu_cs.c deleted file mode 100644 index 48242f813..000000000 --- a/lib/mesa/src/freedreno/vulkan/tu_cs.c +++ /dev/null @@ -1,368 +0,0 @@ -/* - * Copyright © 2019 Google LLC - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "tu_cs.h" - -/** - * Initialize a command stream. - */ -void -tu_cs_init(struct tu_cs *cs, enum tu_cs_mode mode, uint32_t initial_size) -{ - assert(mode != TU_CS_MODE_EXTERNAL); - - memset(cs, 0, sizeof(*cs)); - - cs->mode = mode; - cs->next_bo_size = initial_size; -} - -/** - * Initialize a command stream as a wrapper to an external buffer. - */ -void -tu_cs_init_external(struct tu_cs *cs, uint32_t *start, uint32_t *end) -{ - memset(cs, 0, sizeof(*cs)); - - cs->mode = TU_CS_MODE_EXTERNAL; - cs->start = cs->reserved_end = cs->cur = start; - cs->end = end; -} - -/** - * Finish and release all resources owned by a command stream. - */ -void -tu_cs_finish(struct tu_device *dev, struct tu_cs *cs) -{ - for (uint32_t i = 0; i < cs->bo_count; ++i) { - tu_bo_finish(dev, cs->bos[i]); - free(cs->bos[i]); - } - - free(cs->entries); - free(cs->bos); -} - -/** - * Get the offset of the command packets emitted since the last call to - * tu_cs_add_entry. - */ -static uint32_t -tu_cs_get_offset(const struct tu_cs *cs) -{ - assert(cs->bo_count); - return cs->start - (uint32_t *) cs->bos[cs->bo_count - 1]->map; -} - -/** - * Get the size of the command packets emitted since the last call to - * tu_cs_add_entry. - */ -static uint32_t -tu_cs_get_size(const struct tu_cs *cs) -{ - return cs->cur - cs->start; -} - -/** - * Get the size of the remaining space in the current BO. - */ -static uint32_t -tu_cs_get_space(const struct tu_cs *cs) -{ - return cs->end - cs->cur; -} - -/** - * Return true if there is no command packet emitted since the last call to - * tu_cs_add_entry. - */ -static uint32_t -tu_cs_is_empty(const struct tu_cs *cs) -{ - return tu_cs_get_size(cs) == 0; -} - -/* - * Allocate and add a BO to a command stream. Following command packets will - * be emitted to the new BO. - */ -static VkResult -tu_cs_add_bo(struct tu_device *dev, struct tu_cs *cs, uint32_t size) -{ - /* no BO for TU_CS_MODE_EXTERNAL */ - assert(cs->mode != TU_CS_MODE_EXTERNAL); - - /* no dangling command packet */ - assert(tu_cs_is_empty(cs)); - - /* grow cs->bos if needed */ - if (cs->bo_count == cs->bo_capacity) { - uint32_t new_capacity = MAX2(4, 2 * cs->bo_capacity); - struct tu_bo **new_bos = - realloc(cs->bos, new_capacity * sizeof(struct tu_bo *)); - if (!new_bos) - return VK_ERROR_OUT_OF_HOST_MEMORY; - - cs->bo_capacity = new_capacity; - cs->bos = new_bos; - } - - struct tu_bo *new_bo = malloc(sizeof(struct tu_bo)); - if (!new_bo) - return VK_ERROR_OUT_OF_HOST_MEMORY; - - VkResult result = tu_bo_init_new(dev, new_bo, size * sizeof(uint32_t)); - if (result != VK_SUCCESS) { - free(new_bo); - return result; - } - - result = tu_bo_map(dev, new_bo); - if (result != VK_SUCCESS) { - tu_bo_finish(dev, new_bo); - free(new_bo); - return result; - } - - cs->bos[cs->bo_count++] = new_bo; - - cs->start = cs->cur = cs->reserved_end = (uint32_t *) new_bo->map; - cs->end = cs->start + new_bo->size / sizeof(uint32_t); - - return VK_SUCCESS; -} - -/** - * Reserve an IB entry. - */ -static VkResult -tu_cs_reserve_entry(struct tu_device *dev, struct tu_cs *cs) -{ - /* entries are only for TU_CS_MODE_GROW */ - assert(cs->mode == TU_CS_MODE_GROW); - - /* grow cs->entries if needed */ - if (cs->entry_count == cs->entry_capacity) { - uint32_t new_capacity = MAX2(4, cs->entry_capacity * 2); - struct tu_cs_entry *new_entries = - realloc(cs->entries, new_capacity * sizeof(struct tu_cs_entry)); - if (!new_entries) - return VK_ERROR_OUT_OF_HOST_MEMORY; - - cs->entry_capacity = new_capacity; - cs->entries = new_entries; - } - - return VK_SUCCESS; -} - -/** - * Add an IB entry for the command packets emitted since the last call to this - * function. - */ -static void -tu_cs_add_entry(struct tu_cs *cs) -{ - /* entries are only for TU_CS_MODE_GROW */ - assert(cs->mode == TU_CS_MODE_GROW); - - /* disallow empty entry */ - assert(!tu_cs_is_empty(cs)); - - /* - * because we disallow empty entry, tu_cs_add_bo and tu_cs_reserve_entry - * must both have been called - */ - assert(cs->bo_count); - assert(cs->entry_count < cs->entry_capacity); - - /* add an entry for [cs->start, cs->cur] */ - cs->entries[cs->entry_count++] = (struct tu_cs_entry) { - .bo = cs->bos[cs->bo_count - 1], - .size = tu_cs_get_size(cs) * sizeof(uint32_t), - .offset = tu_cs_get_offset(cs) * sizeof(uint32_t), - }; - - cs->start = cs->cur; -} - -/** - * Begin (or continue) command packet emission. This does nothing but sanity - * checks currently. \a cs must not be in TU_CS_MODE_SUB_STREAM mode. - */ -void -tu_cs_begin(struct tu_cs *cs) -{ - assert(cs->mode != TU_CS_MODE_SUB_STREAM); - assert(tu_cs_is_empty(cs)); -} - -/** - * End command packet emission. This adds an IB entry when \a cs is in - * TU_CS_MODE_GROW mode. - */ -void -tu_cs_end(struct tu_cs *cs) -{ - assert(cs->mode != TU_CS_MODE_SUB_STREAM); - - if (cs->mode == TU_CS_MODE_GROW && !tu_cs_is_empty(cs)) - tu_cs_add_entry(cs); -} - -/** - * Begin command packet emission to a sub-stream. \a cs must be in - * TU_CS_MODE_SUB_STREAM mode. - * - * Return \a sub_cs which is in TU_CS_MODE_EXTERNAL mode. tu_cs_begin and - * tu_cs_reserve_space are implied and \a sub_cs is ready for command packet - * emission. - */ -VkResult -tu_cs_begin_sub_stream(struct tu_device *dev, - struct tu_cs *cs, - uint32_t size, - struct tu_cs *sub_cs) -{ - assert(cs->mode == TU_CS_MODE_SUB_STREAM); - assert(size); - - VkResult result = tu_cs_reserve_space(dev, cs, size); - if (result != VK_SUCCESS) - return result; - - tu_cs_init_external(sub_cs, cs->cur, cs->reserved_end); - tu_cs_begin(sub_cs); - result = tu_cs_reserve_space(dev, sub_cs, size); - assert(result == VK_SUCCESS); - - return VK_SUCCESS; -} - -/** - * End command packet emission to a sub-stream. \a sub_cs becomes invalid - * after this call. - * - * Return an IB entry for the sub-stream. The entry has the same lifetime as - * \a cs. - */ -struct tu_cs_entry -tu_cs_end_sub_stream(struct tu_cs *cs, struct tu_cs *sub_cs) -{ - assert(cs->mode == TU_CS_MODE_SUB_STREAM); - assert(cs->bo_count); - assert(sub_cs->start == cs->cur && sub_cs->end == cs->reserved_end); - tu_cs_sanity_check(sub_cs); - - tu_cs_end(sub_cs); - - cs->cur = sub_cs->cur; - - struct tu_cs_entry entry = { - .bo = cs->bos[cs->bo_count - 1], - .size = tu_cs_get_size(cs) * sizeof(uint32_t), - .offset = tu_cs_get_offset(cs) * sizeof(uint32_t), - }; - - cs->start = cs->cur; - - return entry; -} - -/** - * Reserve space from a command stream for \a reserved_size uint32_t values. - * This never fails when \a cs has mode TU_CS_MODE_EXTERNAL. - */ -VkResult -tu_cs_reserve_space(struct tu_device *dev, - struct tu_cs *cs, - uint32_t reserved_size) -{ - if (tu_cs_get_space(cs) < reserved_size) { - if (cs->mode == TU_CS_MODE_EXTERNAL) { - unreachable("cannot grow external buffer"); - return VK_ERROR_OUT_OF_HOST_MEMORY; - } - - /* add an entry for the exiting command packets */ - if (!tu_cs_is_empty(cs)) { - /* no direct command packet for TU_CS_MODE_SUB_STREAM */ - assert(cs->mode != TU_CS_MODE_SUB_STREAM); - - tu_cs_add_entry(cs); - } - - /* switch to a new BO */ - uint32_t new_size = MAX2(cs->next_bo_size, reserved_size); - VkResult result = tu_cs_add_bo(dev, cs, new_size); - if (result != VK_SUCCESS) - return result; - - /* double the size for the next bo */ - new_size <<= 1; - if (cs->next_bo_size < new_size) - cs->next_bo_size = new_size; - } - - assert(tu_cs_get_space(cs) >= reserved_size); - cs->reserved_end = cs->cur + reserved_size; - - if (cs->mode == TU_CS_MODE_GROW) { - /* reserve an entry for the next call to this function or tu_cs_end */ - return tu_cs_reserve_entry(dev, cs); - } - - return VK_SUCCESS; -} - -/** - * Reset a command stream to its initial state. This discards all comand - * packets in \a cs, but does not necessarily release all resources. - */ -void -tu_cs_reset(struct tu_device *dev, struct tu_cs *cs) -{ - if (cs->mode == TU_CS_MODE_EXTERNAL) { - assert(!cs->bo_count && !cs->entry_count); - cs->reserved_end = cs->cur = cs->start; - return; - } - - for (uint32_t i = 0; i + 1 < cs->bo_count; ++i) { - tu_bo_finish(dev, cs->bos[i]); - free(cs->bos[i]); - } - - if (cs->bo_count) { - cs->bos[0] = cs->bos[cs->bo_count - 1]; - cs->bo_count = 1; - - cs->start = cs->cur = cs->reserved_end = (uint32_t *) cs->bos[0]->map; - cs->end = cs->start + cs->bos[0]->size / sizeof(uint32_t); - } - - cs->entry_count = 0; -} diff --git a/lib/mesa/src/freedreno/vulkan/tu_cs_breadcrumbs.c b/lib/mesa/src/freedreno/vulkan/tu_cs_breadcrumbs.c deleted file mode 100644 index 0b79c036a..000000000 --- a/lib/mesa/src/freedreno/vulkan/tu_cs_breadcrumbs.c +++ /dev/null @@ -1,263 +0,0 @@ -/* - * Copyright © 2022 Igalia S.L. - * SPDX-License-Identifier: MIT - */ - -#include "tu_cs.h" - -#include <arpa/inet.h> -#include <netinet/in.h> -#include <sys/socket.h> - -#include "tu_device.h" - -/* A simple implementations of breadcrumbs tracking of GPU progress - * intended to be a last resort when debugging unrecoverable hangs. - * For best results use Vulkan traces to have a predictable place of hang. - * - * For ordinary hangs as a more user-friendly solution use GFR - * "Graphics Flight Recorder". - * - * This implementation aims to handle cases where we cannot do anything - * after the hang, which is achieved by: - * - On GPU after each breadcrumb we wait until CPU acks it and sends udp - * packet to the remote host; - * - At specified breadcrumb require explicit user input to continue - * execution up to the next breadcrumb. - * - * In-driver breadcrumbs also allow more precise tracking since we could - * target a single GPU packet. - * - * - * Breadcrumbs settings: - * - * TU_BREADCRUMBS=$IP:$PORT,break=$BREAKPOINT:$BREAKPOINT_HITS - * Where: - * $BREAKPOINT - the breadcrumb from which we require explicit ack - * $BREAKPOINT_HITS - how many times breakpoint should be reached for - * break to occur. Necessary for a gmem mode and re-usable cmdbuffers - * in both of which the same cmdstream could be executed several times. - * - * - * A typical work flow would be: - * - Start listening for breadcrumbs on remote host: - * nc -lvup $PORT | stdbuf -o0 xxd -pc -c 4 | awk -Wposix '{printf("%u:%u\n", "0x" $0, a[$0]++)}' - * - * - Start capturing command stream: - * sudo cat /sys/kernel/debug/dri/0/rd > ~/cmdstream.rd - * - * - On device replay the hanging trace with: - * TU_BREADCRUMBS=$IP:$PORT,break=-1:0 - * ! Try to reproduce the hang in a sysmem mode because it would - * require much less breadcrumb writes and syncs. - * - * - Increase hangcheck period: - * echo -n 60000 > /sys/kernel/debug/dri/0/hangcheck_period_ms - * - * - After GPU hang note the last breadcrumb and relaunch trace with: - * TU_BREADCRUMBS=$IP:$PORT,break=$LAST_BREADCRUMB:$HITS - * - * - After the breakpoint is reached each breadcrumb would require - * explicit ack from the user. This way it's possible to find - * the last packet which did't hang. - * - * - Find the packet in the decoded cmdstream. - */ - -struct breadcrumbs_context -{ - char remote_host[64]; - int remote_port; - uint32_t breadcrumb_breakpoint; - uint32_t breadcrumb_breakpoint_hits; - - bool thread_stop; - pthread_t breadcrumbs_thread; - - struct tu_device *device; - - uint32_t breadcrumb_idx; -}; - -static void * -sync_gpu_with_cpu(void *_job) -{ - struct breadcrumbs_context *ctx = (struct breadcrumbs_context *) _job; - struct tu6_global *global = - (struct tu6_global *) ctx->device->global_bo->map; - uint32_t last_breadcrumb = 0; - uint32_t breakpoint_hits = 0; - - int s = socket(AF_INET, SOCK_DGRAM, 0); - - if (s < 0) { - mesa_loge("TU_BREADCRUMBS: Error while creating socket"); - return NULL; - } - - struct sockaddr_in to_addr; - to_addr.sin_family = AF_INET; - to_addr.sin_port = htons(ctx->remote_port); - to_addr.sin_addr.s_addr = inet_addr(ctx->remote_host); - - /* Run until we know that no more work would be submitted, - * because each breadcrumb requires an ack from cpu side and without - * the ack GPU would timeout. - */ - while (!ctx->thread_stop) { - uint32_t current_breadcrumb = global->breadcrumb_gpu_sync_seqno; - - if (current_breadcrumb != last_breadcrumb) { - last_breadcrumb = current_breadcrumb; - - uint32_t data = htonl(last_breadcrumb); - if (sendto(s, &data, sizeof(data), 0, (struct sockaddr *) &to_addr, - sizeof(to_addr)) < 0) { - mesa_loge("TU_BREADCRUMBS: sendto failed"); - goto fail; - } - - if (last_breadcrumb >= ctx->breadcrumb_breakpoint && - breakpoint_hits >= ctx->breadcrumb_breakpoint_hits) { - printf("GPU is on breadcrumb %d, continue?", last_breadcrumb); - while (getchar() != 'y') - ; - } - - if (ctx->breadcrumb_breakpoint == last_breadcrumb) - breakpoint_hits++; - - /* ack that we received the value */ - global->breadcrumb_cpu_sync_seqno = last_breadcrumb; - } - } - -fail: - close(s); - - return NULL; -} - -/* Same as tu_cs_emit_pkt7 but without instrumentation */ -static inline void -emit_pkt7(struct tu_cs *cs, uint8_t opcode, uint16_t cnt) -{ - tu_cs_reserve(cs, cnt + 1); - tu_cs_emit(cs, pm4_pkt7_hdr(opcode, cnt)); -} - -void -tu_breadcrumbs_init(struct tu_device *device) -{ - const char *breadcrumbs_opt = NULL; -#ifdef TU_BREADCRUMBS_ENABLED - breadcrumbs_opt = os_get_option("TU_BREADCRUMBS"); -#endif - - device->breadcrumbs_ctx = NULL; - if (!breadcrumbs_opt) { - return; - } - - struct breadcrumbs_context *ctx = - malloc(sizeof(struct breadcrumbs_context)); - ctx->device = device; - ctx->breadcrumb_idx = 0; - ctx->thread_stop = false; - - if (sscanf(breadcrumbs_opt, "%[^:]:%d,break=%u:%u", ctx->remote_host, - &ctx->remote_port, &ctx->breadcrumb_breakpoint, - &ctx->breadcrumb_breakpoint_hits) != 4) { - free(ctx); - mesa_loge("Wrong TU_BREADCRUMBS value"); - return; - } - - device->breadcrumbs_ctx = ctx; - - struct tu6_global *global = device->global_bo->map; - global->breadcrumb_cpu_sync_seqno = 0; - global->breadcrumb_gpu_sync_seqno = 0; - - pthread_create(&ctx->breadcrumbs_thread, NULL, sync_gpu_with_cpu, ctx); -} - -void -tu_breadcrumbs_finish(struct tu_device *device) -{ - struct breadcrumbs_context *ctx = device->breadcrumbs_ctx; - if (!ctx || ctx->thread_stop) - return; - - ctx->thread_stop = true; - pthread_join(ctx->breadcrumbs_thread, NULL); - - free(ctx); -} - -void -tu_cs_emit_sync_breadcrumb(struct tu_cs *cs, uint8_t opcode, uint16_t cnt) -{ - /* TODO: we may run out of space if we add breadcrumbs - * to non-growable CS. - */ - if (cs->mode != TU_CS_MODE_GROW) - return; - - struct tu_device *device = cs->device; - struct breadcrumbs_context *ctx = device->breadcrumbs_ctx; - if (!ctx || ctx->thread_stop) - return; - - bool before_packet = (cnt != 0); - - if (before_packet) { - switch (opcode) { - case CP_EXEC_CS_INDIRECT: - case CP_EXEC_CS: - case CP_DRAW_INDX: - case CP_DRAW_INDX_OFFSET: - case CP_DRAW_INDIRECT: - case CP_DRAW_INDX_INDIRECT: - case CP_DRAW_INDIRECT_MULTI: - case CP_DRAW_AUTO: - case CP_BLIT: - // case CP_SET_DRAW_STATE: - // case CP_LOAD_STATE6_FRAG: - // case CP_LOAD_STATE6_GEOM: - break; - default: - return; - }; - } else { - assert(cs->breadcrumb_emit_after == 0); - } - - uint32_t current_breadcrumb = p_atomic_inc_return(&ctx->breadcrumb_idx); - - if (ctx->breadcrumb_breakpoint != -1 && - current_breadcrumb < ctx->breadcrumb_breakpoint) - return; - - emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0); - emit_pkt7(cs, CP_WAIT_FOR_IDLE, 0); - emit_pkt7(cs, CP_WAIT_FOR_ME, 0); - - emit_pkt7(cs, CP_MEM_WRITE, 3); - tu_cs_emit_qw( - cs, device->global_bo->iova + gb_offset(breadcrumb_gpu_sync_seqno)); - tu_cs_emit(cs, current_breadcrumb); - - /* Wait until CPU acknowledges the value written by GPU */ - emit_pkt7(cs, CP_WAIT_REG_MEM, 6); - tu_cs_emit(cs, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ) | - CP_WAIT_REG_MEM_0_POLL_MEMORY); - tu_cs_emit_qw( - cs, device->global_bo->iova + gb_offset(breadcrumb_cpu_sync_seqno)); - tu_cs_emit(cs, CP_WAIT_REG_MEM_3_REF(current_breadcrumb)); - tu_cs_emit(cs, CP_WAIT_REG_MEM_4_MASK(~0)); - tu_cs_emit(cs, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(16)); - - if (before_packet) - cs->breadcrumb_emit_after = cnt; -} diff --git a/lib/mesa/src/freedreno/vulkan/tu_descriptor_set.c b/lib/mesa/src/freedreno/vulkan/tu_descriptor_set.c deleted file mode 100644 index 0f49d26e2..000000000 --- a/lib/mesa/src/freedreno/vulkan/tu_descriptor_set.c +++ /dev/null @@ -1,570 +0,0 @@ -/* - * Copyright © 2016 Red Hat. - * Copyright © 2016 Bas Nieuwenhuizen - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ -#include "tu_private.h" - -#include <assert.h> -#include <fcntl.h> -#include <stdbool.h> -#include <string.h> -#include <unistd.h> - -#include "util/mesa-sha1.h" -#include "vk_util.h" - -static int -binding_compare(const void *av, const void *bv) -{ - const VkDescriptorSetLayoutBinding *a = - (const VkDescriptorSetLayoutBinding *) av; - const VkDescriptorSetLayoutBinding *b = - (const VkDescriptorSetLayoutBinding *) bv; - - return (a->binding < b->binding) ? -1 : (a->binding > b->binding) ? 1 : 0; -} - -static VkDescriptorSetLayoutBinding * -create_sorted_bindings(const VkDescriptorSetLayoutBinding *bindings, - unsigned count) -{ - VkDescriptorSetLayoutBinding *sorted_bindings = - malloc(count * sizeof(VkDescriptorSetLayoutBinding)); - if (!sorted_bindings) - return NULL; - - memcpy(sorted_bindings, bindings, - count * sizeof(VkDescriptorSetLayoutBinding)); - - qsort(sorted_bindings, count, sizeof(VkDescriptorSetLayoutBinding), - binding_compare); - - return sorted_bindings; -} - -VkResult -tu_CreateDescriptorSetLayout( - VkDevice _device, - const VkDescriptorSetLayoutCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkDescriptorSetLayout *pSetLayout) -{ - TU_FROM_HANDLE(tu_device, device, _device); - struct tu_descriptor_set_layout *set_layout; - - assert(pCreateInfo->sType == - VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO); - const VkDescriptorSetLayoutBindingFlagsCreateInfoEXT *variable_flags = - vk_find_struct_const( - pCreateInfo->pNext, - DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO_EXT); - - uint32_t max_binding = 0; - uint32_t immutable_sampler_count = 0; - for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) { - max_binding = MAX2(max_binding, pCreateInfo->pBindings[j].binding); - if (pCreateInfo->pBindings[j].pImmutableSamplers) - immutable_sampler_count += pCreateInfo->pBindings[j].descriptorCount; - } - - uint32_t samplers_offset = - sizeof(struct tu_descriptor_set_layout) + - (max_binding + 1) * sizeof(set_layout->binding[0]); - size_t size = - samplers_offset + immutable_sampler_count * 4 * sizeof(uint32_t); - - set_layout = vk_alloc2(&device->alloc, pAllocator, size, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!set_layout) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - - set_layout->flags = pCreateInfo->flags; - - /* We just allocate all the samplers at the end of the struct */ - uint32_t *samplers = (uint32_t *) &set_layout->binding[max_binding + 1]; - (void) samplers; /* TODO: Use me */ - - VkDescriptorSetLayoutBinding *bindings = create_sorted_bindings( - pCreateInfo->pBindings, pCreateInfo->bindingCount); - if (!bindings) { - vk_free2(&device->alloc, pAllocator, set_layout); - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - } - - set_layout->binding_count = max_binding + 1; - set_layout->shader_stages = 0; - set_layout->dynamic_shader_stages = 0; - set_layout->has_immutable_samplers = false; - set_layout->size = 0; - - memset(set_layout->binding, 0, - size - sizeof(struct tu_descriptor_set_layout)); - - uint32_t buffer_count = 0; - uint32_t dynamic_offset_count = 0; - - for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) { - const VkDescriptorSetLayoutBinding *binding = bindings + j; - uint32_t b = binding->binding; - uint32_t alignment; - unsigned binding_buffer_count = 0; - - switch (binding->descriptorType) { - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - assert(!(pCreateInfo->flags & - VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR)); - set_layout->binding[b].dynamic_offset_count = 1; - set_layout->dynamic_shader_stages |= binding->stageFlags; - set_layout->binding[b].size = 0; - binding_buffer_count = 1; - alignment = 1; - break; - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - set_layout->binding[b].size = 16; - binding_buffer_count = 1; - alignment = 16; - break; - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - /* main descriptor + fmask descriptor */ - set_layout->binding[b].size = 64; - binding_buffer_count = 1; - alignment = 32; - break; - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - /* main descriptor + fmask descriptor + sampler */ - set_layout->binding[b].size = 96; - binding_buffer_count = 1; - alignment = 32; - break; - case VK_DESCRIPTOR_TYPE_SAMPLER: - set_layout->binding[b].size = 16; - alignment = 16; - break; - default: - unreachable("unknown descriptor type\n"); - break; - } - - set_layout->size = align(set_layout->size, alignment); - set_layout->binding[b].type = binding->descriptorType; - set_layout->binding[b].array_size = binding->descriptorCount; - set_layout->binding[b].offset = set_layout->size; - set_layout->binding[b].buffer_offset = buffer_count; - set_layout->binding[b].dynamic_offset_offset = dynamic_offset_count; - - if (variable_flags && binding->binding < variable_flags->bindingCount && - (variable_flags->pBindingFlags[binding->binding] & - VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT_EXT)) { - assert(!binding->pImmutableSamplers); /* Terribly ill defined how - many samplers are valid */ - assert(binding->binding == max_binding); - - set_layout->has_variable_descriptors = true; - } - - if (binding->pImmutableSamplers) { - set_layout->binding[b].immutable_samplers_offset = samplers_offset; - set_layout->has_immutable_samplers = true; - } - - set_layout->size += - binding->descriptorCount * set_layout->binding[b].size; - buffer_count += binding->descriptorCount * binding_buffer_count; - dynamic_offset_count += binding->descriptorCount * - set_layout->binding[b].dynamic_offset_count; - set_layout->shader_stages |= binding->stageFlags; - } - - free(bindings); - - set_layout->buffer_count = buffer_count; - set_layout->dynamic_offset_count = dynamic_offset_count; - - *pSetLayout = tu_descriptor_set_layout_to_handle(set_layout); - - return VK_SUCCESS; -} - -void -tu_DestroyDescriptorSetLayout(VkDevice _device, - VkDescriptorSetLayout _set_layout, - const VkAllocationCallbacks *pAllocator) -{ - TU_FROM_HANDLE(tu_device, device, _device); - TU_FROM_HANDLE(tu_descriptor_set_layout, set_layout, _set_layout); - - if (!set_layout) - return; - - vk_free2(&device->alloc, pAllocator, set_layout); -} - -void -tu_GetDescriptorSetLayoutSupport( - VkDevice device, - const VkDescriptorSetLayoutCreateInfo *pCreateInfo, - VkDescriptorSetLayoutSupport *pSupport) -{ - VkDescriptorSetLayoutBinding *bindings = create_sorted_bindings( - pCreateInfo->pBindings, pCreateInfo->bindingCount); - if (!bindings) { - pSupport->supported = false; - return; - } - - const VkDescriptorSetLayoutBindingFlagsCreateInfoEXT *variable_flags = - vk_find_struct_const( - pCreateInfo->pNext, - DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO_EXT); - VkDescriptorSetVariableDescriptorCountLayoutSupportEXT *variable_count = - vk_find_struct( - (void *) pCreateInfo->pNext, - DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_LAYOUT_SUPPORT_EXT); - if (variable_count) { - variable_count->maxVariableDescriptorCount = 0; - } - - bool supported = true; - uint64_t size = 0; - for (uint32_t i = 0; i < pCreateInfo->bindingCount; i++) { - const VkDescriptorSetLayoutBinding *binding = bindings + i; - - uint64_t descriptor_size = 0; - uint64_t descriptor_alignment = 1; - switch (binding->descriptorType) { - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - break; - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - descriptor_size = 16; - descriptor_alignment = 16; - break; - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - descriptor_size = 64; - descriptor_alignment = 32; - break; - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - descriptor_size = 96; - descriptor_alignment = 32; - break; - case VK_DESCRIPTOR_TYPE_SAMPLER: - descriptor_size = 16; - descriptor_alignment = 16; - break; - default: - unreachable("unknown descriptor type\n"); - break; - } - - if (size && !align_u64(size, descriptor_alignment)) { - supported = false; - } - size = align_u64(size, descriptor_alignment); - - uint64_t max_count = UINT64_MAX; - if (descriptor_size) - max_count = (UINT64_MAX - size) / descriptor_size; - - if (max_count < binding->descriptorCount) { - supported = false; - } - if (variable_flags && binding->binding < variable_flags->bindingCount && - variable_count && - (variable_flags->pBindingFlags[binding->binding] & - VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT_EXT)) { - variable_count->maxVariableDescriptorCount = - MIN2(UINT32_MAX, max_count); - } - size += binding->descriptorCount * descriptor_size; - } - - free(bindings); - - pSupport->supported = supported; -} - -/* - * Pipeline layouts. These have nothing to do with the pipeline. They are - * just multiple descriptor set layouts pasted together. - */ - -VkResult -tu_CreatePipelineLayout(VkDevice _device, - const VkPipelineLayoutCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkPipelineLayout *pPipelineLayout) -{ - TU_FROM_HANDLE(tu_device, device, _device); - struct tu_pipeline_layout *layout; - struct mesa_sha1 ctx; - - assert(pCreateInfo->sType == - VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO); - - layout = vk_alloc2(&device->alloc, pAllocator, sizeof(*layout), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (layout == NULL) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - - layout->num_sets = pCreateInfo->setLayoutCount; - - unsigned dynamic_offset_count = 0; - - _mesa_sha1_init(&ctx); - for (uint32_t set = 0; set < pCreateInfo->setLayoutCount; set++) { - TU_FROM_HANDLE(tu_descriptor_set_layout, set_layout, - pCreateInfo->pSetLayouts[set]); - layout->set[set].layout = set_layout; - - layout->set[set].dynamic_offset_start = dynamic_offset_count; - for (uint32_t b = 0; b < set_layout->binding_count; b++) { - dynamic_offset_count += set_layout->binding[b].array_size * - set_layout->binding[b].dynamic_offset_count; - if (set_layout->binding[b].immutable_samplers_offset) - _mesa_sha1_update( - &ctx, - tu_immutable_samplers(set_layout, set_layout->binding + b), - set_layout->binding[b].array_size * 4 * sizeof(uint32_t)); - } - _mesa_sha1_update( - &ctx, set_layout->binding, - sizeof(set_layout->binding[0]) * set_layout->binding_count); - } - - layout->dynamic_offset_count = dynamic_offset_count; - layout->push_constant_size = 0; - - for (unsigned i = 0; i < pCreateInfo->pushConstantRangeCount; ++i) { - const VkPushConstantRange *range = pCreateInfo->pPushConstantRanges + i; - layout->push_constant_size = - MAX2(layout->push_constant_size, range->offset + range->size); - } - - layout->push_constant_size = align(layout->push_constant_size, 16); - _mesa_sha1_update(&ctx, &layout->push_constant_size, - sizeof(layout->push_constant_size)); - _mesa_sha1_final(&ctx, layout->sha1); - *pPipelineLayout = tu_pipeline_layout_to_handle(layout); - - return VK_SUCCESS; -} - -void -tu_DestroyPipelineLayout(VkDevice _device, - VkPipelineLayout _pipelineLayout, - const VkAllocationCallbacks *pAllocator) -{ - TU_FROM_HANDLE(tu_device, device, _device); - TU_FROM_HANDLE(tu_pipeline_layout, pipeline_layout, _pipelineLayout); - - if (!pipeline_layout) - return; - vk_free2(&device->alloc, pAllocator, pipeline_layout); -} - -#define EMPTY 1 - -VkResult -tu_CreateDescriptorPool(VkDevice _device, - const VkDescriptorPoolCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkDescriptorPool *pDescriptorPool) -{ - TU_FROM_HANDLE(tu_device, device, _device); - tu_use_args(device); - tu_stub(); - return VK_SUCCESS; -} - -void -tu_DestroyDescriptorPool(VkDevice _device, - VkDescriptorPool _pool, - const VkAllocationCallbacks *pAllocator) -{ -} - -VkResult -tu_ResetDescriptorPool(VkDevice _device, - VkDescriptorPool descriptorPool, - VkDescriptorPoolResetFlags flags) -{ - TU_FROM_HANDLE(tu_device, device, _device); - TU_FROM_HANDLE(tu_descriptor_pool, pool, descriptorPool); - - tu_use_args(device, pool); - tu_stub(); - return VK_SUCCESS; -} - -VkResult -tu_AllocateDescriptorSets(VkDevice _device, - const VkDescriptorSetAllocateInfo *pAllocateInfo, - VkDescriptorSet *pDescriptorSets) -{ - TU_FROM_HANDLE(tu_device, device, _device); - TU_FROM_HANDLE(tu_descriptor_pool, pool, pAllocateInfo->descriptorPool); - - tu_use_args(device, pool); - tu_stub(); - return VK_SUCCESS; -} - -VkResult -tu_FreeDescriptorSets(VkDevice _device, - VkDescriptorPool descriptorPool, - uint32_t count, - const VkDescriptorSet *pDescriptorSets) -{ - TU_FROM_HANDLE(tu_device, device, _device); - TU_FROM_HANDLE(tu_descriptor_pool, pool, descriptorPool); - - tu_use_args(device, pool); - tu_stub(); - return VK_SUCCESS; -} - -void -tu_update_descriptor_sets(struct tu_device *device, - struct tu_cmd_buffer *cmd_buffer, - VkDescriptorSet dstSetOverride, - uint32_t descriptorWriteCount, - const VkWriteDescriptorSet *pDescriptorWrites, - uint32_t descriptorCopyCount, - const VkCopyDescriptorSet *pDescriptorCopies) -{ -} - -void -tu_UpdateDescriptorSets(VkDevice _device, - uint32_t descriptorWriteCount, - const VkWriteDescriptorSet *pDescriptorWrites, - uint32_t descriptorCopyCount, - const VkCopyDescriptorSet *pDescriptorCopies) -{ - TU_FROM_HANDLE(tu_device, device, _device); - - tu_update_descriptor_sets(device, NULL, VK_NULL_HANDLE, - descriptorWriteCount, pDescriptorWrites, - descriptorCopyCount, pDescriptorCopies); -} - -VkResult -tu_CreateDescriptorUpdateTemplate( - VkDevice _device, - const VkDescriptorUpdateTemplateCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkDescriptorUpdateTemplate *pDescriptorUpdateTemplate) -{ - TU_FROM_HANDLE(tu_device, device, _device); - TU_FROM_HANDLE(tu_descriptor_set_layout, set_layout, - pCreateInfo->descriptorSetLayout); - const uint32_t entry_count = pCreateInfo->descriptorUpdateEntryCount; - const size_t size = - sizeof(struct tu_descriptor_update_template) + - sizeof(struct tu_descriptor_update_template_entry) * entry_count; - struct tu_descriptor_update_template *templ; - - templ = vk_alloc2(&device->alloc, pAllocator, size, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!templ) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - - *pDescriptorUpdateTemplate = - tu_descriptor_update_template_to_handle(templ); - - tu_use_args(set_layout); - tu_stub(); - return VK_SUCCESS; -} - -void -tu_DestroyDescriptorUpdateTemplate( - VkDevice _device, - VkDescriptorUpdateTemplate descriptorUpdateTemplate, - const VkAllocationCallbacks *pAllocator) -{ - TU_FROM_HANDLE(tu_device, device, _device); - TU_FROM_HANDLE(tu_descriptor_update_template, templ, - descriptorUpdateTemplate); - - if (!templ) - return; - - vk_free2(&device->alloc, pAllocator, templ); -} - -void -tu_update_descriptor_set_with_template( - struct tu_device *device, - struct tu_cmd_buffer *cmd_buffer, - struct tu_descriptor_set *set, - VkDescriptorUpdateTemplate descriptorUpdateTemplate, - const void *pData) -{ - TU_FROM_HANDLE(tu_descriptor_update_template, templ, - descriptorUpdateTemplate); - tu_use_args(templ); -} - -void -tu_UpdateDescriptorSetWithTemplate( - VkDevice _device, - VkDescriptorSet descriptorSet, - VkDescriptorUpdateTemplate descriptorUpdateTemplate, - const void *pData) -{ - TU_FROM_HANDLE(tu_device, device, _device); - TU_FROM_HANDLE(tu_descriptor_set, set, descriptorSet); - - tu_update_descriptor_set_with_template(device, NULL, set, - descriptorUpdateTemplate, pData); -} - -VkResult -tu_CreateSamplerYcbcrConversion( - VkDevice device, - const VkSamplerYcbcrConversionCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkSamplerYcbcrConversion *pYcbcrConversion) -{ - *pYcbcrConversion = VK_NULL_HANDLE; - return VK_SUCCESS; -} - -void -tu_DestroySamplerYcbcrConversion(VkDevice device, - VkSamplerYcbcrConversion ycbcrConversion, - const VkAllocationCallbacks *pAllocator) -{ - /* Do nothing. */ -} diff --git a/lib/mesa/src/freedreno/vulkan/tu_device.c b/lib/mesa/src/freedreno/vulkan/tu_device.c deleted file mode 100644 index 901f02486..000000000 --- a/lib/mesa/src/freedreno/vulkan/tu_device.c +++ /dev/null @@ -1,2071 +0,0 @@ -/* - * Copyright © 2016 Red Hat. - * Copyright © 2016 Bas Nieuwenhuizen - * - * based in part on anv driver which is: - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "tu_private.h" - -#include <fcntl.h> -#include <libsync.h> -#include <stdbool.h> -#include <string.h> -#include <sys/mman.h> -#include <sys/sysinfo.h> -#include <unistd.h> -#include <xf86drm.h> - -#include "compiler/glsl_types.h" -#include "util/debug.h" -#include "util/disk_cache.h" -#include "vk_format.h" -#include "vk_util.h" - -#include "drm-uapi/msm_drm.h" - -static int -tu_device_get_cache_uuid(uint16_t family, void *uuid) -{ - uint32_t mesa_timestamp; - uint16_t f = family; - memset(uuid, 0, VK_UUID_SIZE); - if (!disk_cache_get_function_timestamp(tu_device_get_cache_uuid, - &mesa_timestamp)) - return -1; - - memcpy(uuid, &mesa_timestamp, 4); - memcpy((char *) uuid + 4, &f, 2); - snprintf((char *) uuid + 6, VK_UUID_SIZE - 10, "tu"); - return 0; -} - -static void -tu_get_driver_uuid(void *uuid) -{ - memset(uuid, 0, VK_UUID_SIZE); - snprintf(uuid, VK_UUID_SIZE, "freedreno"); -} - -static void -tu_get_device_uuid(void *uuid) -{ - memset(uuid, 0, VK_UUID_SIZE); -} - -static VkResult -tu_bo_init(struct tu_device *dev, - struct tu_bo *bo, - uint32_t gem_handle, - uint64_t size) -{ - uint64_t iova = tu_gem_info_iova(dev, gem_handle); - if (!iova) - return VK_ERROR_OUT_OF_DEVICE_MEMORY; - - *bo = (struct tu_bo) { - .gem_handle = gem_handle, - .size = size, - .iova = iova, - }; - - return VK_SUCCESS; -} - -VkResult -tu_bo_init_new(struct tu_device *dev, struct tu_bo *bo, uint64_t size) -{ - /* TODO: Choose better flags. As of 2018-11-12, freedreno/drm/msm_bo.c - * always sets `flags = MSM_BO_WC`, and we copy that behavior here. - */ - uint32_t gem_handle = tu_gem_new(dev, size, MSM_BO_WC); - if (!gem_handle) - return vk_error(dev->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); - - VkResult result = tu_bo_init(dev, bo, gem_handle, size); - if (result != VK_SUCCESS) { - tu_gem_close(dev, gem_handle); - return vk_error(dev->instance, result); - } - - return VK_SUCCESS; -} - -VkResult -tu_bo_init_dmabuf(struct tu_device *dev, - struct tu_bo *bo, - uint64_t size, - int fd) -{ - uint32_t gem_handle = tu_gem_import_dmabuf(dev, fd, size); - if (!gem_handle) - return vk_error(dev->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE); - - VkResult result = tu_bo_init(dev, bo, gem_handle, size); - if (result != VK_SUCCESS) { - tu_gem_close(dev, gem_handle); - return vk_error(dev->instance, result); - } - - return VK_SUCCESS; -} - -int -tu_bo_export_dmabuf(struct tu_device *dev, struct tu_bo *bo) -{ - return tu_gem_export_dmabuf(dev, bo->gem_handle); -} - -VkResult -tu_bo_map(struct tu_device *dev, struct tu_bo *bo) -{ - if (bo->map) - return VK_SUCCESS; - - uint64_t offset = tu_gem_info_offset(dev, bo->gem_handle); - if (!offset) - return vk_error(dev->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); - - /* TODO: Should we use the wrapper os_mmap() like Freedreno does? */ - void *map = mmap(0, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED, - dev->physical_device->local_fd, offset); - if (map == MAP_FAILED) - return vk_error(dev->instance, VK_ERROR_MEMORY_MAP_FAILED); - - bo->map = map; - return VK_SUCCESS; -} - -void -tu_bo_finish(struct tu_device *dev, struct tu_bo *bo) -{ - assert(bo->gem_handle); - - if (bo->map) - munmap(bo->map, bo->size); - - tu_gem_close(dev, bo->gem_handle); -} - -static VkResult -tu_physical_device_init(struct tu_physical_device *device, - struct tu_instance *instance, - drmDevicePtr drm_device) -{ - const char *path = drm_device->nodes[DRM_NODE_RENDER]; - VkResult result = VK_SUCCESS; - drmVersionPtr version; - int fd; - int master_fd = -1; - - fd = open(path, O_RDWR | O_CLOEXEC); - if (fd < 0) { - return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, - "failed to open device %s", path); - } - - /* Version 1.3 added MSM_INFO_IOVA. */ - const int min_version_major = 1; - const int min_version_minor = 3; - - version = drmGetVersion(fd); - if (!version) { - close(fd); - return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, - "failed to query kernel driver version for device %s", - path); - } - - if (strcmp(version->name, "msm")) { - drmFreeVersion(version); - close(fd); - return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, - "device %s does not use the msm kernel driver", path); - } - - if (version->version_major != min_version_major || - version->version_minor < min_version_minor) { - result = vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, - "kernel driver for device %s has version %d.%d, " - "but Vulkan requires version >= %d.%d", - path, version->version_major, version->version_minor, - min_version_major, min_version_minor); - drmFreeVersion(version); - close(fd); - return result; - } - - drmFreeVersion(version); - - if (instance->debug_flags & TU_DEBUG_STARTUP) - tu_logi("Found compatible device '%s'.", path); - - device->_loader_data.loaderMagic = ICD_LOADER_MAGIC; - device->instance = instance; - assert(strlen(path) < ARRAY_SIZE(device->path)); - strncpy(device->path, path, ARRAY_SIZE(device->path)); - - if (instance->enabled_extensions.KHR_display) { - master_fd = - open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC); - if (master_fd >= 0) { - /* TODO: free master_fd is accel is not working? */ - } - } - - device->master_fd = master_fd; - device->local_fd = fd; - - if (tu_drm_get_gpu_id(device, &device->gpu_id)) { - if (instance->debug_flags & TU_DEBUG_STARTUP) - tu_logi("Could not query the GPU ID"); - result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, - "could not get GPU ID"); - goto fail; - } - - if (tu_drm_get_gmem_size(device, &device->gmem_size)) { - if (instance->debug_flags & TU_DEBUG_STARTUP) - tu_logi("Could not query the GMEM size"); - result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, - "could not get GMEM size"); - goto fail; - } - - memset(device->name, 0, sizeof(device->name)); - sprintf(device->name, "FD%d", device->gpu_id); - - switch (device->gpu_id) { - case 630: - device->tile_align_w = 32; - device->tile_align_h = 32; - break; - default: - result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, - "device %s is unsupported", device->name); - goto fail; - } - if (tu_device_get_cache_uuid(device->gpu_id, device->cache_uuid)) { - result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, - "cannot generate UUID"); - goto fail; - } - - /* The gpu id is already embedded in the uuid so we just pass "tu" - * when creating the cache. - */ - char buf[VK_UUID_SIZE * 2 + 1]; - disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2); - device->disk_cache = disk_cache_create(device->name, buf, 0); - - fprintf(stderr, "WARNING: tu is not a conformant vulkan implementation, " - "testing use only.\n"); - - tu_get_driver_uuid(&device->device_uuid); - tu_get_device_uuid(&device->device_uuid); - - tu_fill_device_extension_table(device, &device->supported_extensions); - - if (result != VK_SUCCESS) { - vk_error(instance, result); - goto fail; - } - - result = tu_wsi_init(device); - if (result != VK_SUCCESS) { - vk_error(instance, result); - goto fail; - } - - return VK_SUCCESS; - -fail: - close(fd); - if (master_fd != -1) - close(master_fd); - return result; -} - -static void -tu_physical_device_finish(struct tu_physical_device *device) -{ - tu_wsi_finish(device); - - disk_cache_destroy(device->disk_cache); - close(device->local_fd); - if (device->master_fd != -1) - close(device->master_fd); -} - -static void * -default_alloc_func(void *pUserData, - size_t size, - size_t align, - VkSystemAllocationScope allocationScope) -{ - return malloc(size); -} - -static void * -default_realloc_func(void *pUserData, - void *pOriginal, - size_t size, - size_t align, - VkSystemAllocationScope allocationScope) -{ - return realloc(pOriginal, size); -} - -static void -default_free_func(void *pUserData, void *pMemory) -{ - free(pMemory); -} - -static const VkAllocationCallbacks default_alloc = { - .pUserData = NULL, - .pfnAllocation = default_alloc_func, - .pfnReallocation = default_realloc_func, - .pfnFree = default_free_func, -}; - -static const struct debug_control tu_debug_options[] = { - { "startup", TU_DEBUG_STARTUP }, - { "nir", TU_DEBUG_NIR }, - { "ir3", TU_DEBUG_IR3 }, - { NULL, 0 } -}; - -const char * -tu_get_debug_option_name(int id) -{ - assert(id < ARRAY_SIZE(tu_debug_options) - 1); - return tu_debug_options[id].string; -} - -static int -tu_get_instance_extension_index(const char *name) -{ - for (unsigned i = 0; i < TU_INSTANCE_EXTENSION_COUNT; ++i) { - if (strcmp(name, tu_instance_extensions[i].extensionName) == 0) - return i; - } - return -1; -} - -VkResult -tu_CreateInstance(const VkInstanceCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkInstance *pInstance) -{ - struct tu_instance *instance; - VkResult result; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO); - - uint32_t client_version; - if (pCreateInfo->pApplicationInfo && - pCreateInfo->pApplicationInfo->apiVersion != 0) { - client_version = pCreateInfo->pApplicationInfo->apiVersion; - } else { - tu_EnumerateInstanceVersion(&client_version); - } - - instance = vk_zalloc2(&default_alloc, pAllocator, sizeof(*instance), 8, - VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); - if (!instance) - return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); - - instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC; - - if (pAllocator) - instance->alloc = *pAllocator; - else - instance->alloc = default_alloc; - - instance->api_version = client_version; - instance->physical_device_count = -1; - - instance->debug_flags = - parse_debug_string(getenv("TU_DEBUG"), tu_debug_options); - - if (instance->debug_flags & TU_DEBUG_STARTUP) - tu_logi("Created an instance"); - - for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { - const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i]; - int index = tu_get_instance_extension_index(ext_name); - - if (index < 0 || !tu_supported_instance_extensions.extensions[index]) { - vk_free2(&default_alloc, pAllocator, instance); - return vk_error(instance, VK_ERROR_EXTENSION_NOT_PRESENT); - } - - instance->enabled_extensions.extensions[index] = true; - } - - result = vk_debug_report_instance_init(&instance->debug_report_callbacks); - if (result != VK_SUCCESS) { - vk_free2(&default_alloc, pAllocator, instance); - return vk_error(instance, result); - } - - glsl_type_singleton_init_or_ref(); - - VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false)); - - *pInstance = tu_instance_to_handle(instance); - - return VK_SUCCESS; -} - -void -tu_DestroyInstance(VkInstance _instance, - const VkAllocationCallbacks *pAllocator) -{ - TU_FROM_HANDLE(tu_instance, instance, _instance); - - if (!instance) - return; - - for (int i = 0; i < instance->physical_device_count; ++i) { - tu_physical_device_finish(instance->physical_devices + i); - } - - VG(VALGRIND_DESTROY_MEMPOOL(instance)); - - glsl_type_singleton_decref(); - - vk_debug_report_instance_destroy(&instance->debug_report_callbacks); - - vk_free(&instance->alloc, instance); -} - -static VkResult -tu_enumerate_devices(struct tu_instance *instance) -{ - /* TODO: Check for more devices ? */ - drmDevicePtr devices[8]; - VkResult result = VK_ERROR_INCOMPATIBLE_DRIVER; - int max_devices; - - instance->physical_device_count = 0; - - max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices)); - - if (instance->debug_flags & TU_DEBUG_STARTUP) - tu_logi("Found %d drm nodes", max_devices); - - if (max_devices < 1) - return vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER); - - for (unsigned i = 0; i < (unsigned) max_devices; i++) { - if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER && - devices[i]->bustype == DRM_BUS_PLATFORM) { - - result = tu_physical_device_init( - instance->physical_devices + instance->physical_device_count, - instance, devices[i]); - if (result == VK_SUCCESS) - ++instance->physical_device_count; - else if (result != VK_ERROR_INCOMPATIBLE_DRIVER) - break; - } - } - drmFreeDevices(devices, max_devices); - - return result; -} - -VkResult -tu_EnumeratePhysicalDevices(VkInstance _instance, - uint32_t *pPhysicalDeviceCount, - VkPhysicalDevice *pPhysicalDevices) -{ - TU_FROM_HANDLE(tu_instance, instance, _instance); - VK_OUTARRAY_MAKE(out, pPhysicalDevices, pPhysicalDeviceCount); - - VkResult result; - - if (instance->physical_device_count < 0) { - result = tu_enumerate_devices(instance); - if (result != VK_SUCCESS && result != VK_ERROR_INCOMPATIBLE_DRIVER) - return result; - } - - for (uint32_t i = 0; i < instance->physical_device_count; ++i) { - vk_outarray_append(&out, p) - { - *p = tu_physical_device_to_handle(instance->physical_devices + i); - } - } - - return vk_outarray_status(&out); -} - -VkResult -tu_EnumeratePhysicalDeviceGroups( - VkInstance _instance, - uint32_t *pPhysicalDeviceGroupCount, - VkPhysicalDeviceGroupProperties *pPhysicalDeviceGroupProperties) -{ - TU_FROM_HANDLE(tu_instance, instance, _instance); - VK_OUTARRAY_MAKE(out, pPhysicalDeviceGroupProperties, - pPhysicalDeviceGroupCount); - VkResult result; - - if (instance->physical_device_count < 0) { - result = tu_enumerate_devices(instance); - if (result != VK_SUCCESS && result != VK_ERROR_INCOMPATIBLE_DRIVER) - return result; - } - - for (uint32_t i = 0; i < instance->physical_device_count; ++i) { - vk_outarray_append(&out, p) - { - p->physicalDeviceCount = 1; - p->physicalDevices[0] = - tu_physical_device_to_handle(instance->physical_devices + i); - p->subsetAllocation = false; - } - } - - return vk_outarray_status(&out); -} - -void -tu_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice, - VkPhysicalDeviceFeatures *pFeatures) -{ - memset(pFeatures, 0, sizeof(*pFeatures)); - - *pFeatures = (VkPhysicalDeviceFeatures) { - .robustBufferAccess = false, - .fullDrawIndexUint32 = false, - .imageCubeArray = false, - .independentBlend = false, - .geometryShader = false, - .tessellationShader = false, - .sampleRateShading = false, - .dualSrcBlend = false, - .logicOp = false, - .multiDrawIndirect = false, - .drawIndirectFirstInstance = false, - .depthClamp = false, - .depthBiasClamp = false, - .fillModeNonSolid = false, - .depthBounds = false, - .wideLines = false, - .largePoints = false, - .alphaToOne = false, - .multiViewport = false, - .samplerAnisotropy = false, - .textureCompressionETC2 = false, - .textureCompressionASTC_LDR = false, - .textureCompressionBC = false, - .occlusionQueryPrecise = false, - .pipelineStatisticsQuery = false, - .vertexPipelineStoresAndAtomics = false, - .fragmentStoresAndAtomics = false, - .shaderTessellationAndGeometryPointSize = false, - .shaderImageGatherExtended = false, - .shaderStorageImageExtendedFormats = false, - .shaderStorageImageMultisample = false, - .shaderUniformBufferArrayDynamicIndexing = false, - .shaderSampledImageArrayDynamicIndexing = false, - .shaderStorageBufferArrayDynamicIndexing = false, - .shaderStorageImageArrayDynamicIndexing = false, - .shaderStorageImageReadWithoutFormat = false, - .shaderStorageImageWriteWithoutFormat = false, - .shaderClipDistance = false, - .shaderCullDistance = false, - .shaderFloat64 = false, - .shaderInt64 = false, - .shaderInt16 = false, - .sparseBinding = false, - .variableMultisampleRate = false, - .inheritedQueries = false, - }; -} - -void -tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice, - VkPhysicalDeviceFeatures2 *pFeatures) -{ - vk_foreach_struct(ext, pFeatures->pNext) - { - switch (ext->sType) { - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES: { - VkPhysicalDeviceVariablePointersFeatures *features = (void *) ext; - features->variablePointersStorageBuffer = false; - features->variablePointers = false; - break; - } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES: { - VkPhysicalDeviceMultiviewFeatures *features = - (VkPhysicalDeviceMultiviewFeatures *) ext; - features->multiview = false; - features->multiviewGeometryShader = false; - features->multiviewTessellationShader = false; - break; - } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES: { - VkPhysicalDeviceShaderDrawParametersFeatures *features = - (VkPhysicalDeviceShaderDrawParametersFeatures *) ext; - features->shaderDrawParameters = false; - break; - } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: { - VkPhysicalDeviceProtectedMemoryFeatures *features = - (VkPhysicalDeviceProtectedMemoryFeatures *) ext; - features->protectedMemory = false; - break; - } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: { - VkPhysicalDevice16BitStorageFeatures *features = - (VkPhysicalDevice16BitStorageFeatures *) ext; - features->storageBuffer16BitAccess = false; - features->uniformAndStorageBuffer16BitAccess = false; - features->storagePushConstant16 = false; - features->storageInputOutput16 = false; - break; - } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: { - VkPhysicalDeviceSamplerYcbcrConversionFeatures *features = - (VkPhysicalDeviceSamplerYcbcrConversionFeatures *) ext; - features->samplerYcbcrConversion = false; - break; - } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT: { - VkPhysicalDeviceDescriptorIndexingFeaturesEXT *features = - (VkPhysicalDeviceDescriptorIndexingFeaturesEXT *) ext; - features->shaderInputAttachmentArrayDynamicIndexing = false; - features->shaderUniformTexelBufferArrayDynamicIndexing = false; - features->shaderStorageTexelBufferArrayDynamicIndexing = false; - features->shaderUniformBufferArrayNonUniformIndexing = false; - features->shaderSampledImageArrayNonUniformIndexing = false; - features->shaderStorageBufferArrayNonUniformIndexing = false; - features->shaderStorageImageArrayNonUniformIndexing = false; - features->shaderInputAttachmentArrayNonUniformIndexing = false; - features->shaderUniformTexelBufferArrayNonUniformIndexing = false; - features->shaderStorageTexelBufferArrayNonUniformIndexing = false; - features->descriptorBindingUniformBufferUpdateAfterBind = false; - features->descriptorBindingSampledImageUpdateAfterBind = false; - features->descriptorBindingStorageImageUpdateAfterBind = false; - features->descriptorBindingStorageBufferUpdateAfterBind = false; - features->descriptorBindingUniformTexelBufferUpdateAfterBind = false; - features->descriptorBindingStorageTexelBufferUpdateAfterBind = false; - features->descriptorBindingUpdateUnusedWhilePending = false; - features->descriptorBindingPartiallyBound = false; - features->descriptorBindingVariableDescriptorCount = false; - features->runtimeDescriptorArray = false; - break; - } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: { - VkPhysicalDeviceConditionalRenderingFeaturesEXT *features = - (VkPhysicalDeviceConditionalRenderingFeaturesEXT *) ext; - features->conditionalRendering = false; - features->inheritedConditionalRendering = false; - break; - } - default: - break; - } - } - return tu_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features); -} - -void -tu_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice, - VkPhysicalDeviceProperties *pProperties) -{ - TU_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice); - VkSampleCountFlags sample_counts = 0xf; - - /* make sure that the entire descriptor set is addressable with a signed - * 32-bit int. So the sum of all limits scaled by descriptor size has to - * be at most 2 GiB. the combined image & samples object count as one of - * both. This limit is for the pipeline layout, not for the set layout, but - * there is no set limit, so we just set a pipeline limit. I don't think - * any app is going to hit this soon. */ - size_t max_descriptor_set_size = - ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS) / - (32 /* uniform buffer, 32 due to potential space wasted on alignment */ + - 32 /* storage buffer, 32 due to potential space wasted on alignment */ + - 32 /* sampler, largest when combined with image */ + - 64 /* sampled image */ + 64 /* storage image */); - - VkPhysicalDeviceLimits limits = { - .maxImageDimension1D = (1 << 14), - .maxImageDimension2D = (1 << 14), - .maxImageDimension3D = (1 << 11), - .maxImageDimensionCube = (1 << 14), - .maxImageArrayLayers = (1 << 11), - .maxTexelBufferElements = 128 * 1024 * 1024, - .maxUniformBufferRange = UINT32_MAX, - .maxStorageBufferRange = UINT32_MAX, - .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE, - .maxMemoryAllocationCount = UINT32_MAX, - .maxSamplerAllocationCount = 64 * 1024, - .bufferImageGranularity = 64, /* A cache line */ - .sparseAddressSpaceSize = 0xffffffffu, /* buffer max size */ - .maxBoundDescriptorSets = MAX_SETS, - .maxPerStageDescriptorSamplers = max_descriptor_set_size, - .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size, - .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size, - .maxPerStageDescriptorSampledImages = max_descriptor_set_size, - .maxPerStageDescriptorStorageImages = max_descriptor_set_size, - .maxPerStageDescriptorInputAttachments = max_descriptor_set_size, - .maxPerStageResources = max_descriptor_set_size, - .maxDescriptorSetSamplers = max_descriptor_set_size, - .maxDescriptorSetUniformBuffers = max_descriptor_set_size, - .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS, - .maxDescriptorSetStorageBuffers = max_descriptor_set_size, - .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS, - .maxDescriptorSetSampledImages = max_descriptor_set_size, - .maxDescriptorSetStorageImages = max_descriptor_set_size, - .maxDescriptorSetInputAttachments = max_descriptor_set_size, - .maxVertexInputAttributes = 32, - .maxVertexInputBindings = 32, - .maxVertexInputAttributeOffset = 2047, - .maxVertexInputBindingStride = 2048, - .maxVertexOutputComponents = 128, - .maxTessellationGenerationLevel = 64, - .maxTessellationPatchSize = 32, - .maxTessellationControlPerVertexInputComponents = 128, - .maxTessellationControlPerVertexOutputComponents = 128, - .maxTessellationControlPerPatchOutputComponents = 120, - .maxTessellationControlTotalOutputComponents = 4096, - .maxTessellationEvaluationInputComponents = 128, - .maxTessellationEvaluationOutputComponents = 128, - .maxGeometryShaderInvocations = 127, - .maxGeometryInputComponents = 64, - .maxGeometryOutputComponents = 128, - .maxGeometryOutputVertices = 256, - .maxGeometryTotalOutputComponents = 1024, - .maxFragmentInputComponents = 128, - .maxFragmentOutputAttachments = 8, - .maxFragmentDualSrcAttachments = 1, - .maxFragmentCombinedOutputResources = 8, - .maxComputeSharedMemorySize = 32768, - .maxComputeWorkGroupCount = { 65535, 65535, 65535 }, - .maxComputeWorkGroupInvocations = 2048, - .maxComputeWorkGroupSize = { 2048, 2048, 2048 }, - .subPixelPrecisionBits = 4 /* FIXME */, - .subTexelPrecisionBits = 4 /* FIXME */, - .mipmapPrecisionBits = 4 /* FIXME */, - .maxDrawIndexedIndexValue = UINT32_MAX, - .maxDrawIndirectCount = UINT32_MAX, - .maxSamplerLodBias = 16, - .maxSamplerAnisotropy = 16, - .maxViewports = MAX_VIEWPORTS, - .maxViewportDimensions = { (1 << 14), (1 << 14) }, - .viewportBoundsRange = { INT16_MIN, INT16_MAX }, - .viewportSubPixelBits = 8, - .minMemoryMapAlignment = 4096, /* A page */ - .minTexelBufferOffsetAlignment = 1, - .minUniformBufferOffsetAlignment = 4, - .minStorageBufferOffsetAlignment = 4, - .minTexelOffset = -32, - .maxTexelOffset = 31, - .minTexelGatherOffset = -32, - .maxTexelGatherOffset = 31, - .minInterpolationOffset = -2, - .maxInterpolationOffset = 2, - .subPixelInterpolationOffsetBits = 8, - .maxFramebufferWidth = (1 << 14), - .maxFramebufferHeight = (1 << 14), - .maxFramebufferLayers = (1 << 10), - .framebufferColorSampleCounts = sample_counts, - .framebufferDepthSampleCounts = sample_counts, - .framebufferStencilSampleCounts = sample_counts, - .framebufferNoAttachmentsSampleCounts = sample_counts, - .maxColorAttachments = MAX_RTS, - .sampledImageColorSampleCounts = sample_counts, - .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT, - .sampledImageDepthSampleCounts = sample_counts, - .sampledImageStencilSampleCounts = sample_counts, - .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT, - .maxSampleMaskWords = 1, - .timestampComputeAndGraphics = true, - .timestampPeriod = 1, - .maxClipDistances = 8, - .maxCullDistances = 8, - .maxCombinedClipAndCullDistances = 8, - .discreteQueuePriorities = 1, - .pointSizeRange = { 0.125, 255.875 }, - .lineWidthRange = { 0.0, 7.9921875 }, - .pointSizeGranularity = (1.0 / 8.0), - .lineWidthGranularity = (1.0 / 128.0), - .strictLines = false, /* FINISHME */ - .standardSampleLocations = true, - .optimalBufferCopyOffsetAlignment = 128, - .optimalBufferCopyRowPitchAlignment = 128, - .nonCoherentAtomSize = 64, - }; - - *pProperties = (VkPhysicalDeviceProperties) { - .apiVersion = tu_physical_device_api_version(pdevice), - .driverVersion = vk_get_driver_version(), - .vendorID = 0, /* TODO */ - .deviceID = 0, - .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU, - .limits = limits, - .sparseProperties = { 0 }, - }; - - strcpy(pProperties->deviceName, pdevice->name); - memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE); -} - -void -tu_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, - VkPhysicalDeviceProperties2 *pProperties) -{ - TU_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice); - tu_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties); - - vk_foreach_struct(ext, pProperties->pNext) - { - switch (ext->sType) { - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: { - VkPhysicalDevicePushDescriptorPropertiesKHR *properties = - (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext; - properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS; - break; - } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES: { - VkPhysicalDeviceIDProperties *properties = - (VkPhysicalDeviceIDProperties *) ext; - memcpy(properties->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE); - memcpy(properties->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE); - properties->deviceLUIDValid = false; - break; - } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES: { - VkPhysicalDeviceMultiviewProperties *properties = - (VkPhysicalDeviceMultiviewProperties *) ext; - properties->maxMultiviewViewCount = MAX_VIEWS; - properties->maxMultiviewInstanceIndex = INT_MAX; - break; - } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES: { - VkPhysicalDevicePointClippingProperties *properties = - (VkPhysicalDevicePointClippingProperties *) ext; - properties->pointClippingBehavior = - VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES; - break; - } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: { - VkPhysicalDeviceMaintenance3Properties *properties = - (VkPhysicalDeviceMaintenance3Properties *) ext; - /* Make sure everything is addressable by a signed 32-bit int, and - * our largest descriptors are 96 bytes. */ - properties->maxPerSetDescriptors = (1ull << 31) / 96; - /* Our buffer size fields allow only this much */ - properties->maxMemoryAllocationSize = 0xFFFFFFFFull; - break; - } - default: - break; - } - } -} - -static const VkQueueFamilyProperties tu_queue_family_properties = { - .queueFlags = - VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT, - .queueCount = 1, - .timestampValidBits = 64, - .minImageTransferGranularity = { 1, 1, 1 }, -}; - -void -tu_GetPhysicalDeviceQueueFamilyProperties( - VkPhysicalDevice physicalDevice, - uint32_t *pQueueFamilyPropertyCount, - VkQueueFamilyProperties *pQueueFamilyProperties) -{ - VK_OUTARRAY_MAKE(out, pQueueFamilyProperties, pQueueFamilyPropertyCount); - - vk_outarray_append(&out, p) { *p = tu_queue_family_properties; } -} - -void -tu_GetPhysicalDeviceQueueFamilyProperties2( - VkPhysicalDevice physicalDevice, - uint32_t *pQueueFamilyPropertyCount, - VkQueueFamilyProperties2 *pQueueFamilyProperties) -{ - VK_OUTARRAY_MAKE(out, pQueueFamilyProperties, pQueueFamilyPropertyCount); - - vk_outarray_append(&out, p) - { - p->queueFamilyProperties = tu_queue_family_properties; - } -} - -static uint64_t -tu_get_system_heap_size() -{ - struct sysinfo info; - sysinfo(&info); - - uint64_t total_ram = (uint64_t) info.totalram * (uint64_t) info.mem_unit; - - /* We don't want to burn too much ram with the GPU. If the user has 4GiB - * or less, we use at most half. If they have more than 4GiB, we use 3/4. - */ - uint64_t available_ram; - if (total_ram <= 4ull * 1024ull * 1024ull * 1024ull) - available_ram = total_ram / 2; - else - available_ram = total_ram * 3 / 4; - - return available_ram; -} - -void -tu_GetPhysicalDeviceMemoryProperties( - VkPhysicalDevice physicalDevice, - VkPhysicalDeviceMemoryProperties *pMemoryProperties) -{ - pMemoryProperties->memoryHeapCount = 1; - pMemoryProperties->memoryHeaps[0].size = tu_get_system_heap_size(); - pMemoryProperties->memoryHeaps[0].flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT; - - pMemoryProperties->memoryTypeCount = 1; - pMemoryProperties->memoryTypes[0].propertyFlags = - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; - pMemoryProperties->memoryTypes[0].heapIndex = 0; -} - -void -tu_GetPhysicalDeviceMemoryProperties2( - VkPhysicalDevice physicalDevice, - VkPhysicalDeviceMemoryProperties2 *pMemoryProperties) -{ - return tu_GetPhysicalDeviceMemoryProperties( - physicalDevice, &pMemoryProperties->memoryProperties); -} - -static VkResult -tu_queue_init(struct tu_device *device, - struct tu_queue *queue, - uint32_t queue_family_index, - int idx, - VkDeviceQueueCreateFlags flags) -{ - queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC; - queue->device = device; - queue->queue_family_index = queue_family_index; - queue->queue_idx = idx; - queue->flags = flags; - - int ret = tu_drm_submitqueue_new(device, 0, &queue->msm_queue_id); - if (ret) - return VK_ERROR_INITIALIZATION_FAILED; - - tu_fence_init(&queue->submit_fence, false); - - return VK_SUCCESS; -} - -static void -tu_queue_finish(struct tu_queue *queue) -{ - tu_fence_finish(&queue->submit_fence); - tu_drm_submitqueue_close(queue->device, queue->msm_queue_id); -} - -static int -tu_get_device_extension_index(const char *name) -{ - for (unsigned i = 0; i < TU_DEVICE_EXTENSION_COUNT; ++i) { - if (strcmp(name, tu_device_extensions[i].extensionName) == 0) - return i; - } - return -1; -} - -VkResult -tu_CreateDevice(VkPhysicalDevice physicalDevice, - const VkDeviceCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkDevice *pDevice) -{ - TU_FROM_HANDLE(tu_physical_device, physical_device, physicalDevice); - VkResult result; - struct tu_device *device; - - /* Check enabled features */ - if (pCreateInfo->pEnabledFeatures) { - VkPhysicalDeviceFeatures supported_features; - tu_GetPhysicalDeviceFeatures(physicalDevice, &supported_features); - VkBool32 *supported_feature = (VkBool32 *) &supported_features; - VkBool32 *enabled_feature = (VkBool32 *) pCreateInfo->pEnabledFeatures; - unsigned num_features = - sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32); - for (uint32_t i = 0; i < num_features; i++) { - if (enabled_feature[i] && !supported_feature[i]) - return vk_error(physical_device->instance, - VK_ERROR_FEATURE_NOT_PRESENT); - } - } - - device = vk_zalloc2(&physical_device->instance->alloc, pAllocator, - sizeof(*device), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); - if (!device) - return vk_error(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - - device->_loader_data.loaderMagic = ICD_LOADER_MAGIC; - device->instance = physical_device->instance; - device->physical_device = physical_device; - - if (pAllocator) - device->alloc = *pAllocator; - else - device->alloc = physical_device->instance->alloc; - - for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { - const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i]; - int index = tu_get_device_extension_index(ext_name); - if (index < 0 || - !physical_device->supported_extensions.extensions[index]) { - vk_free(&device->alloc, device); - return vk_error(physical_device->instance, - VK_ERROR_EXTENSION_NOT_PRESENT); - } - - device->enabled_extensions.extensions[index] = true; - } - - for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) { - const VkDeviceQueueCreateInfo *queue_create = - &pCreateInfo->pQueueCreateInfos[i]; - uint32_t qfi = queue_create->queueFamilyIndex; - device->queues[qfi] = vk_alloc( - &device->alloc, queue_create->queueCount * sizeof(struct tu_queue), - 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); - if (!device->queues[qfi]) { - result = VK_ERROR_OUT_OF_HOST_MEMORY; - goto fail; - } - - memset(device->queues[qfi], 0, - queue_create->queueCount * sizeof(struct tu_queue)); - - device->queue_count[qfi] = queue_create->queueCount; - - for (unsigned q = 0; q < queue_create->queueCount; q++) { - result = tu_queue_init(device, &device->queues[qfi][q], qfi, q, - queue_create->flags); - if (result != VK_SUCCESS) - goto fail; - } - } - - device->compiler = ir3_compiler_create(NULL, physical_device->gpu_id); - if (!device->compiler) - goto fail; - - VkPipelineCacheCreateInfo ci; - ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO; - ci.pNext = NULL; - ci.flags = 0; - ci.pInitialData = NULL; - ci.initialDataSize = 0; - VkPipelineCache pc; - result = - tu_CreatePipelineCache(tu_device_to_handle(device), &ci, NULL, &pc); - if (result != VK_SUCCESS) - goto fail; - - device->mem_cache = tu_pipeline_cache_from_handle(pc); - - *pDevice = tu_device_to_handle(device); - return VK_SUCCESS; - -fail: - for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) { - for (unsigned q = 0; q < device->queue_count[i]; q++) - tu_queue_finish(&device->queues[i][q]); - if (device->queue_count[i]) - vk_free(&device->alloc, device->queues[i]); - } - - if (device->compiler) - ralloc_free(device->compiler); - - vk_free(&device->alloc, device); - return result; -} - -void -tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator) -{ - TU_FROM_HANDLE(tu_device, device, _device); - - if (!device) - return; - - for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) { - for (unsigned q = 0; q < device->queue_count[i]; q++) - tu_queue_finish(&device->queues[i][q]); - if (device->queue_count[i]) - vk_free(&device->alloc, device->queues[i]); - } - - /* the compiler does not use pAllocator */ - ralloc_free(device->compiler); - - VkPipelineCache pc = tu_pipeline_cache_to_handle(device->mem_cache); - tu_DestroyPipelineCache(tu_device_to_handle(device), pc, NULL); - - vk_free(&device->alloc, device); -} - -VkResult -tu_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount, - VkLayerProperties *pProperties) -{ - *pPropertyCount = 0; - return VK_SUCCESS; -} - -VkResult -tu_EnumerateDeviceLayerProperties(VkPhysicalDevice physicalDevice, - uint32_t *pPropertyCount, - VkLayerProperties *pProperties) -{ - *pPropertyCount = 0; - return VK_SUCCESS; -} - -void -tu_GetDeviceQueue2(VkDevice _device, - const VkDeviceQueueInfo2 *pQueueInfo, - VkQueue *pQueue) -{ - TU_FROM_HANDLE(tu_device, device, _device); - struct tu_queue *queue; - - queue = - &device->queues[pQueueInfo->queueFamilyIndex][pQueueInfo->queueIndex]; - if (pQueueInfo->flags != queue->flags) { - /* From the Vulkan 1.1.70 spec: - * - * "The queue returned by vkGetDeviceQueue2 must have the same - * flags value from this structure as that used at device - * creation time in a VkDeviceQueueCreateInfo instance. If no - * matching flags were specified at device creation time then - * pQueue will return VK_NULL_HANDLE." - */ - *pQueue = VK_NULL_HANDLE; - return; - } - - *pQueue = tu_queue_to_handle(queue); -} - -void -tu_GetDeviceQueue(VkDevice _device, - uint32_t queueFamilyIndex, - uint32_t queueIndex, - VkQueue *pQueue) -{ - const VkDeviceQueueInfo2 info = - (VkDeviceQueueInfo2) { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_INFO_2, - .queueFamilyIndex = queueFamilyIndex, - .queueIndex = queueIndex }; - - tu_GetDeviceQueue2(_device, &info, pQueue); -} - -VkResult -tu_QueueSubmit(VkQueue _queue, - uint32_t submitCount, - const VkSubmitInfo *pSubmits, - VkFence _fence) -{ - TU_FROM_HANDLE(tu_queue, queue, _queue); - - for (uint32_t i = 0; i < submitCount; ++i) { - const VkSubmitInfo *submit = pSubmits + i; - const bool last_submit = (i == submitCount - 1); - struct tu_bo_list bo_list; - tu_bo_list_init(&bo_list); - - uint32_t entry_count = 0; - for (uint32_t j = 0; j < submit->commandBufferCount; ++j) { - TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, submit->pCommandBuffers[j]); - entry_count += cmdbuf->cs.entry_count; - } - - struct drm_msm_gem_submit_cmd cmds[entry_count]; - uint32_t entry_idx = 0; - for (uint32_t j = 0; j < submit->commandBufferCount; ++j) { - TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, submit->pCommandBuffers[j]); - struct tu_cs *cs = &cmdbuf->cs; - for (unsigned i = 0; i < cs->entry_count; ++i, ++entry_idx) { - cmds[entry_idx].type = MSM_SUBMIT_CMD_BUF; - cmds[entry_idx].submit_idx = - tu_bo_list_add(&bo_list, cs->entries[i].bo, - MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_DUMP); - cmds[entry_idx].submit_offset = cs->entries[i].offset; - cmds[entry_idx].size = cs->entries[i].size; - cmds[entry_idx].pad = 0; - cmds[entry_idx].nr_relocs = 0; - cmds[entry_idx].relocs = 0; - } - - tu_bo_list_merge(&bo_list, &cmdbuf->bo_list); - } - - uint32_t flags = MSM_PIPE_3D0; - if (last_submit) { - flags |= MSM_SUBMIT_FENCE_FD_OUT; - } - - struct drm_msm_gem_submit req = { - .flags = flags, - .queueid = queue->msm_queue_id, - .bos = (uint64_t)(uintptr_t) bo_list.bo_infos, - .nr_bos = bo_list.count, - .cmds = (uint64_t)(uintptr_t)cmds, - .nr_cmds = entry_count, - }; - - int ret = drmCommandWriteRead(queue->device->physical_device->local_fd, - DRM_MSM_GEM_SUBMIT, - &req, sizeof(req)); - if (ret) { - fprintf(stderr, "submit failed: %s\n", strerror(errno)); - abort(); - } - - tu_bo_list_destroy(&bo_list); - - if (last_submit) { - /* no need to merge fences as queue execution is serialized */ - tu_fence_update_fd(&queue->submit_fence, req.fence_fd); - } - } - - if (_fence != VK_NULL_HANDLE) { - TU_FROM_HANDLE(tu_fence, fence, _fence); - tu_fence_copy(fence, &queue->submit_fence); - } - - return VK_SUCCESS; -} - -VkResult -tu_QueueWaitIdle(VkQueue _queue) -{ - TU_FROM_HANDLE(tu_queue, queue, _queue); - - tu_fence_wait_idle(&queue->submit_fence); - - return VK_SUCCESS; -} - -VkResult -tu_DeviceWaitIdle(VkDevice _device) -{ - TU_FROM_HANDLE(tu_device, device, _device); - - for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) { - for (unsigned q = 0; q < device->queue_count[i]; q++) { - tu_QueueWaitIdle(tu_queue_to_handle(&device->queues[i][q])); - } - } - return VK_SUCCESS; -} - -VkResult -tu_EnumerateInstanceExtensionProperties(const char *pLayerName, - uint32_t *pPropertyCount, - VkExtensionProperties *pProperties) -{ - VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount); - - /* We spport no lyaers */ - if (pLayerName) - return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT); - - for (int i = 0; i < TU_INSTANCE_EXTENSION_COUNT; i++) { - if (tu_supported_instance_extensions.extensions[i]) { - vk_outarray_append(&out, prop) { *prop = tu_instance_extensions[i]; } - } - } - - return vk_outarray_status(&out); -} - -VkResult -tu_EnumerateDeviceExtensionProperties(VkPhysicalDevice physicalDevice, - const char *pLayerName, - uint32_t *pPropertyCount, - VkExtensionProperties *pProperties) -{ - /* We spport no lyaers */ - TU_FROM_HANDLE(tu_physical_device, device, physicalDevice); - VK_OUTARRAY_MAKE(out, pProperties, pPropertyCount); - - /* We spport no lyaers */ - if (pLayerName) - return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT); - - for (int i = 0; i < TU_DEVICE_EXTENSION_COUNT; i++) { - if (device->supported_extensions.extensions[i]) { - vk_outarray_append(&out, prop) { *prop = tu_device_extensions[i]; } - } - } - - return vk_outarray_status(&out); -} - -PFN_vkVoidFunction -tu_GetInstanceProcAddr(VkInstance _instance, const char *pName) -{ - TU_FROM_HANDLE(tu_instance, instance, _instance); - - return tu_lookup_entrypoint_checked( - pName, instance ? instance->api_version : 0, - instance ? &instance->enabled_extensions : NULL, NULL); -} - -/* The loader wants us to expose a second GetInstanceProcAddr function - * to work around certain LD_PRELOAD issues seen in apps. - */ -PUBLIC -VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL -vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName); - -PUBLIC -VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL -vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName) -{ - return tu_GetInstanceProcAddr(instance, pName); -} - -PFN_vkVoidFunction -tu_GetDeviceProcAddr(VkDevice _device, const char *pName) -{ - TU_FROM_HANDLE(tu_device, device, _device); - - return tu_lookup_entrypoint_checked(pName, device->instance->api_version, - &device->instance->enabled_extensions, - &device->enabled_extensions); -} - -static VkResult -tu_alloc_memory(struct tu_device *device, - const VkMemoryAllocateInfo *pAllocateInfo, - const VkAllocationCallbacks *pAllocator, - VkDeviceMemory *pMem) -{ - struct tu_device_memory *mem; - VkResult result; - - assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO); - - if (pAllocateInfo->allocationSize == 0) { - /* Apparently, this is allowed */ - *pMem = VK_NULL_HANDLE; - return VK_SUCCESS; - } - - mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (mem == NULL) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - - const VkImportMemoryFdInfoKHR *fd_info = - vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR); - if (fd_info && !fd_info->handleType) - fd_info = NULL; - - if (fd_info) { - assert(fd_info->handleType == - VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT || - fd_info->handleType == - VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); - - /* - * TODO Importing the same fd twice gives us the same handle without - * reference counting. We need to maintain a per-instance handle-to-bo - * table and add reference count to tu_bo. - */ - result = tu_bo_init_dmabuf(device, &mem->bo, - pAllocateInfo->allocationSize, fd_info->fd); - if (result == VK_SUCCESS) { - /* take ownership and close the fd */ - close(fd_info->fd); - } - } else { - result = - tu_bo_init_new(device, &mem->bo, pAllocateInfo->allocationSize); - } - - if (result != VK_SUCCESS) { - vk_free2(&device->alloc, pAllocator, mem); - return result; - } - - mem->size = pAllocateInfo->allocationSize; - mem->type_index = pAllocateInfo->memoryTypeIndex; - - mem->map = NULL; - mem->user_ptr = NULL; - - *pMem = tu_device_memory_to_handle(mem); - - return VK_SUCCESS; -} - -VkResult -tu_AllocateMemory(VkDevice _device, - const VkMemoryAllocateInfo *pAllocateInfo, - const VkAllocationCallbacks *pAllocator, - VkDeviceMemory *pMem) -{ - TU_FROM_HANDLE(tu_device, device, _device); - return tu_alloc_memory(device, pAllocateInfo, pAllocator, pMem); -} - -void -tu_FreeMemory(VkDevice _device, - VkDeviceMemory _mem, - const VkAllocationCallbacks *pAllocator) -{ - TU_FROM_HANDLE(tu_device, device, _device); - TU_FROM_HANDLE(tu_device_memory, mem, _mem); - - if (mem == NULL) - return; - - tu_bo_finish(device, &mem->bo); - vk_free2(&device->alloc, pAllocator, mem); -} - -VkResult -tu_MapMemory(VkDevice _device, - VkDeviceMemory _memory, - VkDeviceSize offset, - VkDeviceSize size, - VkMemoryMapFlags flags, - void **ppData) -{ - TU_FROM_HANDLE(tu_device, device, _device); - TU_FROM_HANDLE(tu_device_memory, mem, _memory); - VkResult result; - - if (mem == NULL) { - *ppData = NULL; - return VK_SUCCESS; - } - - if (mem->user_ptr) { - *ppData = mem->user_ptr; - } else if (!mem->map) { - result = tu_bo_map(device, &mem->bo); - if (result != VK_SUCCESS) - return result; - *ppData = mem->map = mem->bo.map; - } else - *ppData = mem->map; - - if (*ppData) { - *ppData += offset; - return VK_SUCCESS; - } - - return vk_error(device->instance, VK_ERROR_MEMORY_MAP_FAILED); -} - -void -tu_UnmapMemory(VkDevice _device, VkDeviceMemory _memory) -{ - /* I do not see any unmapping done by the freedreno Gallium driver. */ -} - -VkResult -tu_FlushMappedMemoryRanges(VkDevice _device, - uint32_t memoryRangeCount, - const VkMappedMemoryRange *pMemoryRanges) -{ - return VK_SUCCESS; -} - -VkResult -tu_InvalidateMappedMemoryRanges(VkDevice _device, - uint32_t memoryRangeCount, - const VkMappedMemoryRange *pMemoryRanges) -{ - return VK_SUCCESS; -} - -void -tu_GetBufferMemoryRequirements(VkDevice _device, - VkBuffer _buffer, - VkMemoryRequirements *pMemoryRequirements) -{ - TU_FROM_HANDLE(tu_buffer, buffer, _buffer); - - pMemoryRequirements->memoryTypeBits = 1; - pMemoryRequirements->alignment = 16; - pMemoryRequirements->size = - align64(buffer->size, pMemoryRequirements->alignment); -} - -void -tu_GetBufferMemoryRequirements2( - VkDevice device, - const VkBufferMemoryRequirementsInfo2 *pInfo, - VkMemoryRequirements2 *pMemoryRequirements) -{ - tu_GetBufferMemoryRequirements(device, pInfo->buffer, - &pMemoryRequirements->memoryRequirements); -} - -void -tu_GetImageMemoryRequirements(VkDevice _device, - VkImage _image, - VkMemoryRequirements *pMemoryRequirements) -{ - TU_FROM_HANDLE(tu_image, image, _image); - - pMemoryRequirements->memoryTypeBits = 1; - pMemoryRequirements->size = image->size; - pMemoryRequirements->alignment = image->alignment; -} - -void -tu_GetImageMemoryRequirements2(VkDevice device, - const VkImageMemoryRequirementsInfo2 *pInfo, - VkMemoryRequirements2 *pMemoryRequirements) -{ - tu_GetImageMemoryRequirements(device, pInfo->image, - &pMemoryRequirements->memoryRequirements); -} - -void -tu_GetImageSparseMemoryRequirements( - VkDevice device, - VkImage image, - uint32_t *pSparseMemoryRequirementCount, - VkSparseImageMemoryRequirements *pSparseMemoryRequirements) -{ - tu_stub(); -} - -void -tu_GetImageSparseMemoryRequirements2( - VkDevice device, - const VkImageSparseMemoryRequirementsInfo2 *pInfo, - uint32_t *pSparseMemoryRequirementCount, - VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements) -{ - tu_stub(); -} - -void -tu_GetDeviceMemoryCommitment(VkDevice device, - VkDeviceMemory memory, - VkDeviceSize *pCommittedMemoryInBytes) -{ - *pCommittedMemoryInBytes = 0; -} - -VkResult -tu_BindBufferMemory2(VkDevice device, - uint32_t bindInfoCount, - const VkBindBufferMemoryInfo *pBindInfos) -{ - for (uint32_t i = 0; i < bindInfoCount; ++i) { - TU_FROM_HANDLE(tu_device_memory, mem, pBindInfos[i].memory); - TU_FROM_HANDLE(tu_buffer, buffer, pBindInfos[i].buffer); - - if (mem) { - buffer->bo = &mem->bo; - buffer->bo_offset = pBindInfos[i].memoryOffset; - } else { - buffer->bo = NULL; - } - } - return VK_SUCCESS; -} - -VkResult -tu_BindBufferMemory(VkDevice device, - VkBuffer buffer, - VkDeviceMemory memory, - VkDeviceSize memoryOffset) -{ - const VkBindBufferMemoryInfo info = { - .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO, - .buffer = buffer, - .memory = memory, - .memoryOffset = memoryOffset - }; - - return tu_BindBufferMemory2(device, 1, &info); -} - -VkResult -tu_BindImageMemory2(VkDevice device, - uint32_t bindInfoCount, - const VkBindImageMemoryInfo *pBindInfos) -{ - for (uint32_t i = 0; i < bindInfoCount; ++i) { - TU_FROM_HANDLE(tu_image, image, pBindInfos[i].image); - TU_FROM_HANDLE(tu_device_memory, mem, pBindInfos[i].memory); - - if (mem) { - image->bo = &mem->bo; - image->bo_offset = pBindInfos[i].memoryOffset; - } else { - image->bo = NULL; - image->bo_offset = 0; - } - } - - return VK_SUCCESS; -} - -VkResult -tu_BindImageMemory(VkDevice device, - VkImage image, - VkDeviceMemory memory, - VkDeviceSize memoryOffset) -{ - const VkBindImageMemoryInfo info = { - .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO, - .image = image, - .memory = memory, - .memoryOffset = memoryOffset - }; - - return tu_BindImageMemory2(device, 1, &info); -} - -VkResult -tu_QueueBindSparse(VkQueue _queue, - uint32_t bindInfoCount, - const VkBindSparseInfo *pBindInfo, - VkFence _fence) -{ - return VK_SUCCESS; -} - -// Queue semaphore functions - -VkResult -tu_CreateSemaphore(VkDevice _device, - const VkSemaphoreCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkSemaphore *pSemaphore) -{ - TU_FROM_HANDLE(tu_device, device, _device); - - struct tu_semaphore *sem = - vk_alloc2(&device->alloc, pAllocator, sizeof(*sem), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!sem) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - - *pSemaphore = tu_semaphore_to_handle(sem); - return VK_SUCCESS; -} - -void -tu_DestroySemaphore(VkDevice _device, - VkSemaphore _semaphore, - const VkAllocationCallbacks *pAllocator) -{ - TU_FROM_HANDLE(tu_device, device, _device); - TU_FROM_HANDLE(tu_semaphore, sem, _semaphore); - if (!_semaphore) - return; - - vk_free2(&device->alloc, pAllocator, sem); -} - -VkResult -tu_CreateEvent(VkDevice _device, - const VkEventCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkEvent *pEvent) -{ - TU_FROM_HANDLE(tu_device, device, _device); - struct tu_event *event = - vk_alloc2(&device->alloc, pAllocator, sizeof(*event), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - - if (!event) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - - *pEvent = tu_event_to_handle(event); - - return VK_SUCCESS; -} - -void -tu_DestroyEvent(VkDevice _device, - VkEvent _event, - const VkAllocationCallbacks *pAllocator) -{ - TU_FROM_HANDLE(tu_device, device, _device); - TU_FROM_HANDLE(tu_event, event, _event); - - if (!event) - return; - vk_free2(&device->alloc, pAllocator, event); -} - -VkResult -tu_GetEventStatus(VkDevice _device, VkEvent _event) -{ - TU_FROM_HANDLE(tu_event, event, _event); - - if (*event->map == 1) - return VK_EVENT_SET; - return VK_EVENT_RESET; -} - -VkResult -tu_SetEvent(VkDevice _device, VkEvent _event) -{ - TU_FROM_HANDLE(tu_event, event, _event); - *event->map = 1; - - return VK_SUCCESS; -} - -VkResult -tu_ResetEvent(VkDevice _device, VkEvent _event) -{ - TU_FROM_HANDLE(tu_event, event, _event); - *event->map = 0; - - return VK_SUCCESS; -} - -VkResult -tu_CreateBuffer(VkDevice _device, - const VkBufferCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkBuffer *pBuffer) -{ - TU_FROM_HANDLE(tu_device, device, _device); - struct tu_buffer *buffer; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO); - - buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (buffer == NULL) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - - buffer->size = pCreateInfo->size; - buffer->usage = pCreateInfo->usage; - buffer->flags = pCreateInfo->flags; - - *pBuffer = tu_buffer_to_handle(buffer); - - return VK_SUCCESS; -} - -void -tu_DestroyBuffer(VkDevice _device, - VkBuffer _buffer, - const VkAllocationCallbacks *pAllocator) -{ - TU_FROM_HANDLE(tu_device, device, _device); - TU_FROM_HANDLE(tu_buffer, buffer, _buffer); - - if (!buffer) - return; - - vk_free2(&device->alloc, pAllocator, buffer); -} - -static uint32_t -tu_surface_max_layer_count(struct tu_image_view *iview) -{ - return iview->type == VK_IMAGE_VIEW_TYPE_3D - ? iview->extent.depth - : (iview->base_layer + iview->layer_count); -} - -VkResult -tu_CreateFramebuffer(VkDevice _device, - const VkFramebufferCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkFramebuffer *pFramebuffer) -{ - TU_FROM_HANDLE(tu_device, device, _device); - struct tu_framebuffer *framebuffer; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO); - - size_t size = sizeof(*framebuffer) + sizeof(struct tu_attachment_info) * - pCreateInfo->attachmentCount; - framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (framebuffer == NULL) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - - framebuffer->attachment_count = pCreateInfo->attachmentCount; - framebuffer->width = pCreateInfo->width; - framebuffer->height = pCreateInfo->height; - framebuffer->layers = pCreateInfo->layers; - for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { - VkImageView _iview = pCreateInfo->pAttachments[i]; - struct tu_image_view *iview = tu_image_view_from_handle(_iview); - framebuffer->attachments[i].attachment = iview; - - framebuffer->width = MIN2(framebuffer->width, iview->extent.width); - framebuffer->height = MIN2(framebuffer->height, iview->extent.height); - framebuffer->layers = - MIN2(framebuffer->layers, tu_surface_max_layer_count(iview)); - } - - *pFramebuffer = tu_framebuffer_to_handle(framebuffer); - return VK_SUCCESS; -} - -void -tu_DestroyFramebuffer(VkDevice _device, - VkFramebuffer _fb, - const VkAllocationCallbacks *pAllocator) -{ - TU_FROM_HANDLE(tu_device, device, _device); - TU_FROM_HANDLE(tu_framebuffer, fb, _fb); - - if (!fb) - return; - vk_free2(&device->alloc, pAllocator, fb); -} - -static void -tu_init_sampler(struct tu_device *device, - struct tu_sampler *sampler, - const VkSamplerCreateInfo *pCreateInfo) -{ -} - -VkResult -tu_CreateSampler(VkDevice _device, - const VkSamplerCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkSampler *pSampler) -{ - TU_FROM_HANDLE(tu_device, device, _device); - struct tu_sampler *sampler; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); - - sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!sampler) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - - tu_init_sampler(device, sampler, pCreateInfo); - *pSampler = tu_sampler_to_handle(sampler); - - return VK_SUCCESS; -} - -void -tu_DestroySampler(VkDevice _device, - VkSampler _sampler, - const VkAllocationCallbacks *pAllocator) -{ - TU_FROM_HANDLE(tu_device, device, _device); - TU_FROM_HANDLE(tu_sampler, sampler, _sampler); - - if (!sampler) - return; - vk_free2(&device->alloc, pAllocator, sampler); -} - -/* vk_icd.h does not declare this function, so we declare it here to - * suppress Wmissing-prototypes. - */ -PUBLIC VKAPI_ATTR VkResult VKAPI_CALL -vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion); - -PUBLIC VKAPI_ATTR VkResult VKAPI_CALL -vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion) -{ - /* For the full details on loader interface versioning, see - * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>. - * What follows is a condensed summary, to help you navigate the large and - * confusing official doc. - * - * - Loader interface v0 is incompatible with later versions. We don't - * support it. - * - * - In loader interface v1: - * - The first ICD entrypoint called by the loader is - * vk_icdGetInstanceProcAddr(). The ICD must statically expose this - * entrypoint. - * - The ICD must statically expose no other Vulkan symbol unless it - * is linked with -Bsymbolic. - * - Each dispatchable Vulkan handle created by the ICD must be - * a pointer to a struct whose first member is VK_LOADER_DATA. The - * ICD must initialize VK_LOADER_DATA.loadMagic to - * ICD_LOADER_MAGIC. - * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and - * vkDestroySurfaceKHR(). The ICD must be capable of working with - * such loader-managed surfaces. - * - * - Loader interface v2 differs from v1 in: - * - The first ICD entrypoint called by the loader is - * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must - * statically expose this entrypoint. - * - * - Loader interface v3 differs from v2 in: - * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(), - * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR, - * because the loader no longer does so. - */ - *pSupportedVersion = MIN2(*pSupportedVersion, 3u); - return VK_SUCCESS; -} - -VkResult -tu_GetMemoryFdKHR(VkDevice _device, - const VkMemoryGetFdInfoKHR *pGetFdInfo, - int *pFd) -{ - TU_FROM_HANDLE(tu_device, device, _device); - TU_FROM_HANDLE(tu_device_memory, memory, pGetFdInfo->memory); - - assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR); - - /* At the moment, we support only the below handle types. */ - assert(pGetFdInfo->handleType == - VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT || - pGetFdInfo->handleType == - VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); - - int prime_fd = tu_bo_export_dmabuf(device, &memory->bo); - if (prime_fd < 0) - return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); - - *pFd = prime_fd; - return VK_SUCCESS; -} - -VkResult -tu_GetMemoryFdPropertiesKHR(VkDevice _device, - VkExternalMemoryHandleTypeFlagBits handleType, - int fd, - VkMemoryFdPropertiesKHR *pMemoryFdProperties) -{ - assert(handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT); - pMemoryFdProperties->memoryTypeBits = 1; - return VK_SUCCESS; -} - -void -tu_GetPhysicalDeviceExternalSemaphoreProperties( - VkPhysicalDevice physicalDevice, - const VkPhysicalDeviceExternalSemaphoreInfo *pExternalSemaphoreInfo, - VkExternalSemaphoreProperties *pExternalSemaphoreProperties) -{ - pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0; - pExternalSemaphoreProperties->compatibleHandleTypes = 0; - pExternalSemaphoreProperties->externalSemaphoreFeatures = 0; -} - -void -tu_GetPhysicalDeviceExternalFenceProperties( - VkPhysicalDevice physicalDevice, - const VkPhysicalDeviceExternalFenceInfo *pExternalFenceInfo, - VkExternalFenceProperties *pExternalFenceProperties) -{ - pExternalFenceProperties->exportFromImportedHandleTypes = 0; - pExternalFenceProperties->compatibleHandleTypes = 0; - pExternalFenceProperties->externalFenceFeatures = 0; -} - -VkResult -tu_CreateDebugReportCallbackEXT( - VkInstance _instance, - const VkDebugReportCallbackCreateInfoEXT *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkDebugReportCallbackEXT *pCallback) -{ - TU_FROM_HANDLE(tu_instance, instance, _instance); - return vk_create_debug_report_callback(&instance->debug_report_callbacks, - pCreateInfo, pAllocator, - &instance->alloc, pCallback); -} - -void -tu_DestroyDebugReportCallbackEXT(VkInstance _instance, - VkDebugReportCallbackEXT _callback, - const VkAllocationCallbacks *pAllocator) -{ - TU_FROM_HANDLE(tu_instance, instance, _instance); - vk_destroy_debug_report_callback(&instance->debug_report_callbacks, - _callback, pAllocator, &instance->alloc); -} - -void -tu_DebugReportMessageEXT(VkInstance _instance, - VkDebugReportFlagsEXT flags, - VkDebugReportObjectTypeEXT objectType, - uint64_t object, - size_t location, - int32_t messageCode, - const char *pLayerPrefix, - const char *pMessage) -{ - TU_FROM_HANDLE(tu_instance, instance, _instance); - vk_debug_report(&instance->debug_report_callbacks, flags, objectType, - object, location, messageCode, pLayerPrefix, pMessage); -} - -void -tu_GetDeviceGroupPeerMemoryFeatures( - VkDevice device, - uint32_t heapIndex, - uint32_t localDeviceIndex, - uint32_t remoteDeviceIndex, - VkPeerMemoryFeatureFlags *pPeerMemoryFeatures) -{ - assert(localDeviceIndex == remoteDeviceIndex); - - *pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT | - VK_PEER_MEMORY_FEATURE_COPY_DST_BIT | - VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT | - VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT; -} diff --git a/lib/mesa/src/freedreno/vulkan/tu_drm.c b/lib/mesa/src/freedreno/vulkan/tu_drm.c deleted file mode 100644 index 9b2e6f788..000000000 --- a/lib/mesa/src/freedreno/vulkan/tu_drm.c +++ /dev/null @@ -1,194 +0,0 @@ -/* - * Copyright © 2018 Google, Inc. - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "tu_private.h" - -#include <errno.h> -#include <fcntl.h> -#include <stdint.h> -#include <sys/ioctl.h> -#include <xf86drm.h> - -#include "drm-uapi/msm_drm.h" - -static int -tu_drm_get_param(const struct tu_physical_device *dev, - uint32_t param, - uint64_t *value) -{ - /* Technically this requires a pipe, but the kernel only supports one pipe - * anyway at the time of writing and most of these are clearly pipe - * independent. */ - struct drm_msm_param req = { - .pipe = MSM_PIPE_3D0, - .param = param, - }; - - int ret = drmCommandWriteRead(dev->local_fd, DRM_MSM_GET_PARAM, &req, - sizeof(req)); - if (ret) - return ret; - - *value = req.value; - - return 0; -} - -int -tu_drm_get_gpu_id(const struct tu_physical_device *dev, uint32_t *id) -{ - uint64_t value; - int ret = tu_drm_get_param(dev, MSM_PARAM_GPU_ID, &value); - if (ret) - return ret; - - *id = value; - return 0; -} - -int -tu_drm_get_gmem_size(const struct tu_physical_device *dev, uint32_t *size) -{ - uint64_t value; - int ret = tu_drm_get_param(dev, MSM_PARAM_GMEM_SIZE, &value); - if (ret) - return ret; - - *size = value; - return 0; -} - -int -tu_drm_submitqueue_new(const struct tu_device *dev, - int priority, - uint32_t *queue_id) -{ - struct drm_msm_submitqueue req = { - .flags = 0, - .prio = priority, - }; - - int ret = drmCommandWriteRead(dev->physical_device->local_fd, - DRM_MSM_SUBMITQUEUE_NEW, &req, sizeof(req)); - if (ret) - return ret; - - *queue_id = req.id; - return 0; -} - -void -tu_drm_submitqueue_close(const struct tu_device *dev, uint32_t queue_id) -{ - drmCommandWrite(dev->physical_device->local_fd, DRM_MSM_SUBMITQUEUE_CLOSE, - &queue_id, sizeof(uint32_t)); -} - -/** - * Return gem handle on success. Return 0 on failure. - */ -uint32_t -tu_gem_new(const struct tu_device *dev, uint64_t size, uint32_t flags) -{ - struct drm_msm_gem_new req = { - .size = size, - .flags = flags, - }; - - int ret = drmCommandWriteRead(dev->physical_device->local_fd, - DRM_MSM_GEM_NEW, &req, sizeof(req)); - if (ret) - return 0; - - return req.handle; -} - -uint32_t -tu_gem_import_dmabuf(const struct tu_device *dev, int prime_fd, uint64_t size) -{ - /* lseek() to get the real size */ - off_t real_size = lseek(prime_fd, 0, SEEK_END); - lseek(prime_fd, 0, SEEK_SET); - if (real_size < 0 || (uint64_t) real_size < size) - return 0; - - uint32_t gem_handle; - int ret = drmPrimeFDToHandle(dev->physical_device->local_fd, prime_fd, - &gem_handle); - if (ret) - return 0; - - return gem_handle; -} - -int -tu_gem_export_dmabuf(const struct tu_device *dev, uint32_t gem_handle) -{ - int prime_fd; - int ret = drmPrimeHandleToFD(dev->physical_device->local_fd, gem_handle, - DRM_CLOEXEC, &prime_fd); - - return ret == 0 ? prime_fd : -1; -} - -void -tu_gem_close(const struct tu_device *dev, uint32_t gem_handle) -{ - struct drm_gem_close req = { - .handle = gem_handle, - }; - - drmIoctl(dev->physical_device->local_fd, DRM_IOCTL_GEM_CLOSE, &req); -} - -/** Return UINT64_MAX on error. */ -static uint64_t -tu_gem_info(const struct tu_device *dev, uint32_t gem_handle, uint32_t info) -{ - struct drm_msm_gem_info req = { - .handle = gem_handle, - .info = info, - }; - - int ret = drmCommandWriteRead(dev->physical_device->local_fd, - DRM_MSM_GEM_INFO, &req, sizeof(req)); - if (ret == -1) - return UINT64_MAX; - - return req.value; -} - -/** Return UINT64_MAX on error. */ -uint64_t -tu_gem_info_offset(const struct tu_device *dev, uint32_t gem_handle) -{ - return tu_gem_info(dev, gem_handle, MSM_INFO_GET_OFFSET); -} - -/** Return UINT64_MAX on error. */ -uint64_t -tu_gem_info_iova(const struct tu_device *dev, uint32_t gem_handle) -{ - return tu_gem_info(dev, gem_handle, MSM_INFO_GET_IOVA); -} diff --git a/lib/mesa/src/freedreno/vulkan/tu_drm.h b/lib/mesa/src/freedreno/vulkan/tu_drm.h deleted file mode 100644 index c37659535..000000000 --- a/lib/mesa/src/freedreno/vulkan/tu_drm.h +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Copyright © 2016 Red Hat. - * Copyright © 2016 Bas Nieuwenhuizen - * SPDX-License-Identifier: MIT - * - * based in part on anv driver which is: - * Copyright © 2015 Intel Corporation - */ - -#ifndef TU_DRM_H -#define TU_DRM_H - -#include "tu_common.h" - -/* Keep tu_syncobj until porting to common code for kgsl too */ -#ifdef TU_USE_KGSL -struct tu_syncobj; -/* for TU_FROM_HANDLE with both VkFence and VkSemaphore: */ -#define tu_syncobj_from_handle(x) ((struct tu_syncobj*) (uintptr_t) (x)) -#endif - -struct tu_u_trace_syncobj; - -enum tu_bo_alloc_flags -{ - TU_BO_ALLOC_NO_FLAGS = 0, - TU_BO_ALLOC_ALLOW_DUMP = 1 << 0, - TU_BO_ALLOC_GPU_READ_ONLY = 1 << 1, - TU_BO_ALLOC_REPLAYABLE = 1 << 2, -}; - -/* Define tu_timeline_sync type based on drm syncobj for a point type - * for vk_sync_timeline, and the logic to handle is mostly copied from - * anv_bo_sync since it seems it can be used by similar way to anv. - */ -enum tu_timeline_sync_state { - /** Indicates that this is a new (or newly reset fence) */ - TU_TIMELINE_SYNC_STATE_RESET, - - /** Indicates that this fence has been submitted to the GPU but is still - * (as far as we know) in use by the GPU. - */ - TU_TIMELINE_SYNC_STATE_SUBMITTED, - - TU_TIMELINE_SYNC_STATE_SIGNALED, -}; - -struct tu_bo -{ - uint32_t gem_handle; - uint64_t size; - uint64_t iova; - void *map; - const char *name; /* pointer to device->bo_sizes's entry's name */ - int32_t refcnt; - -#ifndef TU_USE_KGSL - uint32_t bo_list_idx; -#endif - - bool implicit_sync : 1; -}; - -struct tu_timeline_sync { - struct vk_sync base; - - enum tu_timeline_sync_state state; - uint32_t syncobj; -}; - -VkResult -tu_bo_init_new_explicit_iova(struct tu_device *dev, - struct tu_bo **out_bo, - uint64_t size, - uint64_t client_iova, - enum tu_bo_alloc_flags flags, const char *name); - -static inline VkResult -tu_bo_init_new(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size, - enum tu_bo_alloc_flags flags, const char *name) -{ - return tu_bo_init_new_explicit_iova(dev, out_bo, size, 0, flags, name); -} - -VkResult -tu_bo_init_dmabuf(struct tu_device *dev, - struct tu_bo **bo, - uint64_t size, - int fd); - -int -tu_bo_export_dmabuf(struct tu_device *dev, struct tu_bo *bo); - -void -tu_bo_finish(struct tu_device *dev, struct tu_bo *bo); - -VkResult -tu_bo_map(struct tu_device *dev, struct tu_bo *bo); - -static inline struct tu_bo * -tu_bo_get_ref(struct tu_bo *bo) -{ - p_atomic_inc(&bo->refcnt); - return bo; -} - -#ifdef TU_USE_KGSL -VkResult -tu_enumerate_devices(struct vk_instance *vk_instance); -#else -VkResult -tu_physical_device_try_create(struct vk_instance *vk_instance, - struct _drmDevice *drm_device, - struct vk_physical_device **out); -#endif - -int -tu_device_get_gpu_timestamp(struct tu_device *dev, - uint64_t *ts); - -int -tu_device_get_suspend_count(struct tu_device *dev, - uint64_t *suspend_count); - -VkResult -tu_device_wait_u_trace(struct tu_device *dev, struct tu_u_trace_syncobj *syncobj); - -VkResult -tu_device_check_status(struct vk_device *vk_device); - -int -tu_drm_submitqueue_new(const struct tu_device *dev, - int priority, - uint32_t *queue_id); - -void -tu_drm_submitqueue_close(const struct tu_device *dev, uint32_t queue_id); - -VkResult -tu_queue_submit(struct vk_queue *vk_queue, struct vk_queue_submit *submit); - -#endif /* TU_DRM_H */ diff --git a/lib/mesa/src/freedreno/vulkan/tu_dynamic_rendering.c b/lib/mesa/src/freedreno/vulkan/tu_dynamic_rendering.c deleted file mode 100644 index b74cebd05..000000000 --- a/lib/mesa/src/freedreno/vulkan/tu_dynamic_rendering.c +++ /dev/null @@ -1,220 +0,0 @@ -/* - * Copyright © 2022 Valve Corporation - * SPDX-License-Identifier: MIT - */ - -/* When using dynamic rendering with the suspend/resume functionality, we - * sometimes need to merge together multiple suspended render passes - * dynamically at submit time. This involves combining all the saved-up IBs, - * emitting the rendering commands usually emitted by - * CmdEndRenderPass()/CmdEndRendering(), and inserting them in between the - * user command buffers. This gets tricky, because the same command buffer can - * be submitted multiple times, each time with a different other set of - * command buffers, and with VK_COMMAND_BUFFER_SIMULTANEOUS_USE_BIT, this can - * happen before the previous submission of the same command buffer has - * finished. At some point we have to free these commands and the BOs they are - * contained in, and we can't do that when resubmitting the last command - * buffer in the sequence because it may still be in use. This means we have - * to make the commands owned by the device and roll our own memory tracking. - */ - -#include "tu_dynamic_rendering.h" - -#include "tu_cmd_buffer.h" -#include "tu_cs.h" - -struct dynamic_rendering_entry { - struct tu_cmd_buffer *cmd_buffer; - uint32_t fence; /* The fence value when cmd_buffer becomes available */ -}; - -static VkResult -get_cmd_buffer(struct tu_device *dev, struct tu_cmd_buffer **cmd_buffer_out) -{ - struct tu6_global *global = dev->global_bo->map; - - /* Note: because QueueSubmit is serialized, we don't need any locks here. - */ - uint32_t fence = global->dynamic_rendering_fence; - - /* Go through the entries and return the finished ones to the pool, - * shrinking the array of pending entries. - */ - struct dynamic_rendering_entry *new_entry = - util_dynarray_begin(&dev->dynamic_rendering_pending); - uint32_t entries = 0; - util_dynarray_foreach(&dev->dynamic_rendering_pending, - struct dynamic_rendering_entry, entry) { - if (entry->fence <= fence) { - VkCommandBuffer vk_buf = tu_cmd_buffer_to_handle(entry->cmd_buffer); - vk_common_FreeCommandBuffers(tu_device_to_handle(dev), - dev->dynamic_rendering_pool, 1, &vk_buf); - } else { - *new_entry = *entry; - new_entry++; - entries++; - } - } - UNUSED void *dummy = - util_dynarray_resize(&dev->dynamic_rendering_pending, - struct dynamic_rendering_entry, entries); - - VkCommandBuffer vk_buf; - const VkCommandBufferAllocateInfo info = { - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, - .pNext = NULL, - .commandPool = dev->dynamic_rendering_pool, - .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, - .commandBufferCount = 1, - }; - VkResult result = - vk_common_AllocateCommandBuffers(tu_device_to_handle(dev), &info, &vk_buf); - if (result != VK_SUCCESS) - return result; - - TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, vk_buf); - - struct dynamic_rendering_entry entry = { - .cmd_buffer = cmd_buffer, - .fence = ++dev->dynamic_rendering_fence, - }; - - util_dynarray_append(&dev->dynamic_rendering_pending, - struct dynamic_rendering_entry, entry); - *cmd_buffer_out = cmd_buffer; - - return VK_SUCCESS; -} - -VkResult -tu_init_dynamic_rendering(struct tu_device *dev) -{ - util_dynarray_init(&dev->dynamic_rendering_pending, NULL); - dev->dynamic_rendering_fence = 0; - - return vk_common_CreateCommandPool(tu_device_to_handle(dev), - &(VkCommandPoolCreateInfo) { - .pNext = NULL, - .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, - .flags = 0, - .queueFamilyIndex = 0, - }, &dev->vk.alloc, - &dev->dynamic_rendering_pool); -} - -void -tu_destroy_dynamic_rendering(struct tu_device *dev) -{ - vk_common_DestroyCommandPool(tu_device_to_handle(dev), - dev->dynamic_rendering_pool, - &dev->vk.alloc); - util_dynarray_fini(&dev->dynamic_rendering_pending); -} - -VkResult -tu_insert_dynamic_cmdbufs(struct tu_device *dev, - struct tu_cmd_buffer ***cmds_ptr, - uint32_t *size) -{ - struct tu_cmd_buffer **old_cmds = *cmds_ptr; - - bool has_dynamic = false; - for (unsigned i = 0; i < *size; i++) { - if (old_cmds[i]->state.suspend_resume != SR_NONE) { - has_dynamic = true; - break; - } - } - - if (!has_dynamic) - return VK_SUCCESS; - - struct util_dynarray cmds = {0}; - struct tu_cmd_buffer *cmd_buffer = NULL; - - for (unsigned i = 0; i < *size; i++) { - switch (old_cmds[i]->state.suspend_resume) { - case SR_NONE: - case SR_IN_CHAIN: - case SR_IN_PRE_CHAIN: - break; - - case SR_AFTER_PRE_CHAIN: - case SR_IN_CHAIN_AFTER_PRE_CHAIN: - tu_append_pre_chain(cmd_buffer, old_cmds[i]); - - if (!(old_cmds[i]->usage_flags & - VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT)) { - u_trace_disable_event_range(old_cmds[i]->pre_chain.trace_renderpass_start, - old_cmds[i]->pre_chain.trace_renderpass_end); - } - - tu_cmd_render(cmd_buffer); - - tu_cs_emit_pkt7(&cmd_buffer->cs, CP_MEM_WRITE, 3); - tu_cs_emit_qw(&cmd_buffer->cs, - global_iova(cmd_buffer, dynamic_rendering_fence)); - tu_cs_emit(&cmd_buffer->cs, dev->dynamic_rendering_fence); - - tu_EndCommandBuffer(tu_cmd_buffer_to_handle(cmd_buffer)); - util_dynarray_append(&cmds, struct tu_cmd_buffer *, cmd_buffer); - cmd_buffer = NULL; - break; - } - - util_dynarray_append(&cmds, struct tu_cmd_buffer *, old_cmds[i]); - - switch (old_cmds[i]->state.suspend_resume) { - case SR_NONE: - case SR_AFTER_PRE_CHAIN: - break; - case SR_IN_CHAIN: - case SR_IN_CHAIN_AFTER_PRE_CHAIN: { - assert(!cmd_buffer); - VkResult result = get_cmd_buffer(dev, &cmd_buffer); - if (result != VK_SUCCESS) - return result; - - tu_cmd_buffer_begin(cmd_buffer, - VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT); - - /* Setup the render pass using the first command buffer involved in - * the chain, so that it will look like we're inside a render pass - * for tu_cmd_render(). - */ - tu_restore_suspended_pass(cmd_buffer, old_cmds[i]); - FALLTHROUGH; - } - case SR_IN_PRE_CHAIN: - assert(cmd_buffer); - - tu_append_pre_post_chain(cmd_buffer, old_cmds[i]); - - if (old_cmds[i]->usage_flags & - VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT) { - u_trace_disable_event_range(old_cmds[i]->trace_renderpass_start, - old_cmds[i]->trace_renderpass_end); - } - - /* When the command buffer is finally recorded, we need its state - * to be the state of the command buffer before it. We need this - * because we skip tu6_emit_hw(). - */ - cmd_buffer->state.ccu_state = old_cmds[i]->state.ccu_state; - break; - } - } - - struct tu_cmd_buffer **new_cmds = - vk_alloc(&dev->vk.alloc, cmds.size, alignof(struct tu_cmd_buffer *), - VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); - if (!new_cmds) - return VK_ERROR_OUT_OF_HOST_MEMORY; - memcpy(new_cmds, cmds.data, cmds.size); - *cmds_ptr = new_cmds; - *size = util_dynarray_num_elements(&cmds, struct tu_cmd_buffer *); - util_dynarray_fini(&cmds); - - return VK_SUCCESS; -} - diff --git a/lib/mesa/src/freedreno/vulkan/tu_formats.c b/lib/mesa/src/freedreno/vulkan/tu_formats.c deleted file mode 100644 index 537b59d25..000000000 --- a/lib/mesa/src/freedreno/vulkan/tu_formats.c +++ /dev/null @@ -1,998 +0,0 @@ - -/* - * Copyright © 2016 Red Hat. - * Copyright © 2016 Bas Nieuwenhuizen - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "tu_private.h" - -#include "registers/adreno_common.xml.h" -#include "registers/a6xx.xml.h" - -#include "util/format_r11g11b10f.h" -#include "util/format_srgb.h" -#include "util/u_half.h" -#include "vk_format.h" -#include "vk_util.h" - -/** - * Declare a format table. A format table is an array of tu_native_format. - * It can map a consecutive range of VkFormat to the corresponding - * tu_native_format. - * - * TU_FORMAT_TABLE_FIRST and TU_FORMAT_TABLE_LAST must already be defined and - * have the values of the first and last VkFormat of the array respectively. - */ -#define TU_FORMAT_TABLE(var) \ - static const VkFormat var##_first = TU_FORMAT_TABLE_FIRST; \ - static const VkFormat var##_last = TU_FORMAT_TABLE_LAST; \ - static const struct tu_native_format var[TU_FORMAT_TABLE_LAST - TU_FORMAT_TABLE_FIRST + 1] -#undef TU_FORMAT_TABLE_FIRST -#undef TU_FORMAT_TABLE_LAST - -#define VFMT6_x -1 -#define TFMT6_x -1 -#define RB6_x -1 - -#define TU6_FMT(vkfmt, vtxfmt, texfmt, rbfmt, swapfmt, valid) \ - [VK_FORMAT_##vkfmt - TU_FORMAT_TABLE_FIRST] = { \ - .vtx = VFMT6_##vtxfmt, \ - .tex = TFMT6_##texfmt, \ - .rb = RB6_##rbfmt, \ - .swap = swapfmt, \ - .present = valid, \ - } - -/** - * fmt/alias/swap are derived from VkFormat mechanically (and might not even - * exist). It is the macro of choice that decides whether a VkFormat is - * supported and how. - */ -#define TU6_VTC(vk, fmt, alias, swap) TU6_FMT(vk, fmt, fmt, alias, swap, true) -#define TU6_xTC(vk, fmt, alias, swap) TU6_FMT(vk, x, fmt, alias, swap, true) -#define TU6_VTx(vk, fmt, alias, swap) TU6_FMT(vk, fmt, fmt, x, swap, true) -#define TU6_Vxx(vk, fmt, alias, swap) TU6_FMT(vk, fmt, x, x, swap, true) -#define TU6_xTx(vk, fmt, alias, swap) TU6_FMT(vk, x, fmt, x, swap, true) -#define TU6_xxx(vk, fmt, alias, swap) TU6_FMT(vk, x, x, x, WZYX, false) - -#define TU_FORMAT_TABLE_FIRST VK_FORMAT_UNDEFINED -#define TU_FORMAT_TABLE_LAST VK_FORMAT_ASTC_12x12_SRGB_BLOCK -TU_FORMAT_TABLE(tu6_format_table0) = { - TU6_xxx(UNDEFINED, x, x, x), /* 0 */ - - /* 8-bit packed */ - TU6_xxx(R4G4_UNORM_PACK8, 4_4_UNORM, R4G4_UNORM, WZXY), /* 1 */ - - /* 16-bit packed */ - TU6_xTC(R4G4B4A4_UNORM_PACK16, 4_4_4_4_UNORM, R4G4B4A4_UNORM, XYZW), /* 2 */ - TU6_xTC(B4G4R4A4_UNORM_PACK16, 4_4_4_4_UNORM, R4G4B4A4_UNORM, ZYXW), /* 3 */ - TU6_xTC(R5G6B5_UNORM_PACK16, 5_6_5_UNORM, R5G6B5_UNORM, WXYZ), /* 4 */ - TU6_xTC(B5G6R5_UNORM_PACK16, 5_6_5_UNORM, R5G6B5_UNORM, WXYZ), /* 5 */ - TU6_xxx(R5G5B5A1_UNORM_PACK16, 1_5_5_5_UNORM, A1R5G5B5_UNORM, XYZW), /* 6 */ - TU6_xxx(B5G5R5A1_UNORM_PACK16, 1_5_5_5_UNORM, A1R5G5B5_UNORM, XYZW), /* 7 */ - TU6_xTC(A1R5G5B5_UNORM_PACK16, 5_5_5_1_UNORM, R5G5B5A1_UNORM, WXYZ), /* 8 */ - - /* 8-bit R */ - TU6_VTC(R8_UNORM, 8_UNORM, R8_UNORM, WZYX), /* 9 */ - TU6_VTC(R8_SNORM, 8_SNORM, R8_SNORM, WZYX), /* 10 */ - TU6_Vxx(R8_USCALED, 8_UINT, R8_UINT, WZYX), /* 11 */ - TU6_Vxx(R8_SSCALED, 8_SINT, R8_SINT, WZYX), /* 12 */ - TU6_VTC(R8_UINT, 8_UINT, R8_UINT, WZYX), /* 13 */ - TU6_VTC(R8_SINT, 8_SINT, R8_SINT, WZYX), /* 14 */ - TU6_xTC(R8_SRGB, 8_UNORM, R8_UNORM, WZYX), /* 15 */ - - /* 16-bit RG */ - TU6_VTC(R8G8_UNORM, 8_8_UNORM, R8G8_UNORM, WZYX), /* 16 */ - TU6_VTC(R8G8_SNORM, 8_8_SNORM, R8G8_SNORM, WZYX), /* 17 */ - TU6_Vxx(R8G8_USCALED, 8_8_UINT, R8G8_UINT, WZYX), /* 18 */ - TU6_Vxx(R8G8_SSCALED, 8_8_SINT, R8G8_SINT, WZYX), /* 19 */ - TU6_VTC(R8G8_UINT, 8_8_UINT, R8G8_UINT, WZYX), /* 20 */ - TU6_VTC(R8G8_SINT, 8_8_SINT, R8G8_SINT, WZYX), /* 21 */ - TU6_xTC(R8G8_SRGB, 8_8_UNORM, R8G8_UNORM, WZYX), /* 22 */ - - /* 24-bit RGB */ - TU6_Vxx(R8G8B8_UNORM, 8_8_8_UNORM, R8G8B8_UNORM, WZYX), /* 23 */ - TU6_Vxx(R8G8B8_SNORM, 8_8_8_SNORM, R8G8B8_SNORM, WZYX), /* 24 */ - TU6_Vxx(R8G8B8_USCALED, 8_8_8_UINT, R8G8B8_UINT, WZYX), /* 25 */ - TU6_Vxx(R8G8B8_SSCALED, 8_8_8_SINT, R8G8B8_SINT, WZYX), /* 26 */ - TU6_Vxx(R8G8B8_UINT, 8_8_8_UINT, R8G8B8_UINT, WZYX), /* 27 */ - TU6_Vxx(R8G8B8_SINT, 8_8_8_SINT, R8G8B8_SINT, WZYX), /* 28 */ - TU6_xxx(R8G8B8_SRGB, 8_8_8_UNORM, R8G8B8_UNORM, WZYX), /* 29 */ - - /* 24-bit BGR */ - TU6_Vxx(B8G8R8_UNORM, 8_8_8_UNORM, R8G8B8_UNORM, WXYZ), /* 30 */ - TU6_Vxx(B8G8R8_SNORM, 8_8_8_SNORM, R8G8B8_SNORM, WXYZ), /* 31 */ - TU6_Vxx(B8G8R8_USCALED, 8_8_8_UINT, R8G8B8_UINT, WXYZ), /* 32 */ - TU6_Vxx(B8G8R8_SSCALED, 8_8_8_SINT, R8G8B8_SINT, WXYZ), /* 33 */ - TU6_Vxx(B8G8R8_UINT, 8_8_8_UINT, R8G8B8_UINT, WXYZ), /* 34 */ - TU6_Vxx(B8G8R8_SINT, 8_8_8_SINT, R8G8B8_SINT, WXYZ), /* 35 */ - TU6_xxx(B8G8R8_SRGB, 8_8_8_UNORM, R8G8B8_UNORM, WXYZ), /* 36 */ - - /* 32-bit RGBA */ - TU6_VTC(R8G8B8A8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX), /* 37 */ - TU6_VTC(R8G8B8A8_SNORM, 8_8_8_8_SNORM, R8G8B8A8_SNORM, WZYX), /* 38 */ - TU6_Vxx(R8G8B8A8_USCALED, 8_8_8_8_UINT, R8G8B8A8_UINT, WZYX), /* 39 */ - TU6_Vxx(R8G8B8A8_SSCALED, 8_8_8_8_SINT, R8G8B8A8_SINT, WZYX), /* 40 */ - TU6_VTC(R8G8B8A8_UINT, 8_8_8_8_UINT, R8G8B8A8_UINT, WZYX), /* 41 */ - TU6_VTC(R8G8B8A8_SINT, 8_8_8_8_SINT, R8G8B8A8_SINT, WZYX), /* 42 */ - TU6_xTC(R8G8B8A8_SRGB, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX), /* 43 */ - - /* 32-bit BGRA */ - TU6_VTC(B8G8R8A8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WXYZ), /* 44 */ - TU6_VTC(B8G8R8A8_SNORM, 8_8_8_8_SNORM, R8G8B8A8_SNORM, WXYZ), /* 45 */ - TU6_Vxx(B8G8R8A8_USCALED, 8_8_8_8_UINT, R8G8B8A8_UINT, WXYZ), /* 46 */ - TU6_Vxx(B8G8R8A8_SSCALED, 8_8_8_8_SINT, R8G8B8A8_SINT, WXYZ), /* 47 */ - TU6_VTC(B8G8R8A8_UINT, 8_8_8_8_UINT, R8G8B8A8_UINT, WXYZ), /* 48 */ - TU6_VTC(B8G8R8A8_SINT, 8_8_8_8_SINT, R8G8B8A8_SINT, WXYZ), /* 49 */ - TU6_xTC(B8G8R8A8_SRGB, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WXYZ), /* 50 */ - - /* 32-bit packed */ - TU6_VTC(A8B8G8R8_UNORM_PACK32, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX), /* 51 */ - TU6_VTC(A8B8G8R8_SNORM_PACK32, 8_8_8_8_SNORM, R8G8B8A8_SNORM, WZYX), /* 52 */ - TU6_Vxx(A8B8G8R8_USCALED_PACK32, 8_8_8_8_UINT, R8G8B8A8_UINT, WZYX), /* 53 */ - TU6_Vxx(A8B8G8R8_SSCALED_PACK32, 8_8_8_8_SINT, R8G8B8A8_SINT, WZYX), /* 54 */ - TU6_VTC(A8B8G8R8_UINT_PACK32, 8_8_8_8_UINT, R8G8B8A8_UINT, WZYX), /* 55 */ - TU6_VTC(A8B8G8R8_SINT_PACK32, 8_8_8_8_SINT, R8G8B8A8_SINT, WZYX), /* 56 */ - TU6_xTC(A8B8G8R8_SRGB_PACK32, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX), /* 57 */ - TU6_VTC(A2R10G10B10_UNORM_PACK32, 10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ), /* 58 */ - TU6_Vxx(A2R10G10B10_SNORM_PACK32, 10_10_10_2_SNORM, R10G10B10A2_SNORM, WXYZ), /* 59 */ - TU6_Vxx(A2R10G10B10_USCALED_PACK32, 10_10_10_2_UINT, R10G10B10A2_UINT, WXYZ), /* 60 */ - TU6_Vxx(A2R10G10B10_SSCALED_PACK32, 10_10_10_2_SINT, R10G10B10A2_SINT, WXYZ), /* 61 */ - TU6_VTC(A2R10G10B10_UINT_PACK32, 10_10_10_2_UINT, R10G10B10A2_UINT, WXYZ), /* 62 */ - TU6_Vxx(A2R10G10B10_SINT_PACK32, 10_10_10_2_SINT, R10G10B10A2_SINT, WXYZ), /* 63 */ - TU6_VTC(A2B10G10R10_UNORM_PACK32, 10_10_10_2_UNORM, R10G10B10A2_UNORM, WZYX), /* 64 */ - TU6_Vxx(A2B10G10R10_SNORM_PACK32, 10_10_10_2_SNORM, R10G10B10A2_SNORM, WZYX), /* 65 */ - TU6_Vxx(A2B10G10R10_USCALED_PACK32, 10_10_10_2_UINT, R10G10B10A2_UINT, WZYX), /* 66 */ - TU6_Vxx(A2B10G10R10_SSCALED_PACK32, 10_10_10_2_SINT, R10G10B10A2_SINT, WZYX), /* 67 */ - TU6_VTC(A2B10G10R10_UINT_PACK32, 10_10_10_2_UINT, R10G10B10A2_UINT, WZYX), /* 68 */ - TU6_Vxx(A2B10G10R10_SINT_PACK32, 10_10_10_2_SINT, R10G10B10A2_SINT, WZYX), /* 69 */ - - /* 16-bit R */ - TU6_VTC(R16_UNORM, 16_UNORM, R16_UNORM, WZYX), /* 70 */ - TU6_VTC(R16_SNORM, 16_SNORM, R16_SNORM, WZYX), /* 71 */ - TU6_Vxx(R16_USCALED, 16_UINT, R16_UINT, WZYX), /* 72 */ - TU6_Vxx(R16_SSCALED, 16_SINT, R16_SINT, WZYX), /* 73 */ - TU6_VTC(R16_UINT, 16_UINT, R16_UINT, WZYX), /* 74 */ - TU6_VTC(R16_SINT, 16_SINT, R16_SINT, WZYX), /* 75 */ - TU6_VTC(R16_SFLOAT, 16_FLOAT, R16_FLOAT, WZYX), /* 76 */ - - /* 32-bit RG */ - TU6_VTC(R16G16_UNORM, 16_16_UNORM, R16G16_UNORM, WZYX), /* 77 */ - TU6_VTC(R16G16_SNORM, 16_16_SNORM, R16G16_SNORM, WZYX), /* 78 */ - TU6_VTx(R16G16_USCALED, 16_16_UINT, R16G16_UINT, WZYX), /* 79 */ - TU6_VTx(R16G16_SSCALED, 16_16_SINT, R16G16_SINT, WZYX), /* 80 */ - TU6_VTC(R16G16_UINT, 16_16_UINT, R16G16_UINT, WZYX), /* 81 */ - TU6_VTC(R16G16_SINT, 16_16_SINT, R16G16_SINT, WZYX), /* 82 */ - TU6_VTC(R16G16_SFLOAT, 16_16_FLOAT, R16G16_FLOAT, WZYX), /* 83 */ - - /* 48-bit RGB */ - TU6_Vxx(R16G16B16_UNORM, 16_16_16_UNORM, R16G16B16_UNORM, WZYX), /* 84 */ - TU6_Vxx(R16G16B16_SNORM, 16_16_16_SNORM, R16G16B16_SNORM, WZYX), /* 85 */ - TU6_Vxx(R16G16B16_USCALED, 16_16_16_UINT, R16G16B16_UINT, WZYX), /* 86 */ - TU6_Vxx(R16G16B16_SSCALED, 16_16_16_SINT, R16G16B16_SINT, WZYX), /* 87 */ - TU6_Vxx(R16G16B16_UINT, 16_16_16_UINT, R16G16B16_UINT, WZYX), /* 88 */ - TU6_Vxx(R16G16B16_SINT, 16_16_16_SINT, R16G16B16_SINT, WZYX), /* 89 */ - TU6_Vxx(R16G16B16_SFLOAT, 16_16_16_FLOAT, R16G16B16_FLOAT, WZYX), /* 90 */ - - /* 64-bit RGBA */ - TU6_VTC(R16G16B16A16_UNORM, 16_16_16_16_UNORM, R16G16B16A16_UNORM, WZYX), /* 91 */ - TU6_VTC(R16G16B16A16_SNORM, 16_16_16_16_SNORM, R16G16B16A16_SNORM, WZYX), /* 92 */ - TU6_VTx(R16G16B16A16_USCALED, 16_16_16_16_UINT, R16G16B16A16_UINT, WZYX), /* 93 */ - TU6_VTx(R16G16B16A16_SSCALED, 16_16_16_16_SINT, R16G16B16A16_SINT, WZYX), /* 94 */ - TU6_VTC(R16G16B16A16_UINT, 16_16_16_16_UINT, R16G16B16A16_UINT, WZYX), /* 95 */ - TU6_VTC(R16G16B16A16_SINT, 16_16_16_16_SINT, R16G16B16A16_SINT, WZYX), /* 96 */ - TU6_VTC(R16G16B16A16_SFLOAT, 16_16_16_16_FLOAT, R16G16B16A16_FLOAT, WZYX), /* 97 */ - - /* 32-bit R */ - TU6_VTC(R32_UINT, 32_UINT, R32_UINT, WZYX), /* 98 */ - TU6_VTC(R32_SINT, 32_SINT, R32_SINT, WZYX), /* 99 */ - TU6_VTC(R32_SFLOAT, 32_FLOAT, R32_FLOAT, WZYX), /* 100 */ - - /* 64-bit RG */ - TU6_VTC(R32G32_UINT, 32_32_UINT, R32G32_UINT, WZYX), /* 101 */ - TU6_VTC(R32G32_SINT, 32_32_SINT, R32G32_SINT, WZYX), /* 102 */ - TU6_VTC(R32G32_SFLOAT, 32_32_FLOAT, R32G32_FLOAT, WZYX), /* 103 */ - - /* 96-bit RGB */ - TU6_VTx(R32G32B32_UINT, 32_32_32_UINT, R32G32B32_UINT, WZYX), /* 104 */ - TU6_VTx(R32G32B32_SINT, 32_32_32_SINT, R32G32B32_SINT, WZYX), /* 105 */ - TU6_VTx(R32G32B32_SFLOAT, 32_32_32_FLOAT, R32G32B32_FLOAT, WZYX), /* 106 */ - - /* 128-bit RGBA */ - TU6_VTC(R32G32B32A32_UINT, 32_32_32_32_UINT, R32G32B32A32_UINT, WZYX), /* 107 */ - TU6_VTC(R32G32B32A32_SINT, 32_32_32_32_SINT, R32G32B32A32_SINT, WZYX), /* 108 */ - TU6_VTC(R32G32B32A32_SFLOAT, 32_32_32_32_FLOAT, R32G32B32A32_FLOAT, WZYX), /* 109 */ - - /* 64-bit R */ - TU6_xxx(R64_UINT, 64_UINT, R64_UINT, WZYX), /* 110 */ - TU6_xxx(R64_SINT, 64_SINT, R64_SINT, WZYX), /* 111 */ - TU6_xxx(R64_SFLOAT, 64_FLOAT, R64_FLOAT, WZYX), /* 112 */ - - /* 128-bit RG */ - TU6_xxx(R64G64_UINT, 64_64_UINT, R64G64_UINT, WZYX), /* 113 */ - TU6_xxx(R64G64_SINT, 64_64_SINT, R64G64_SINT, WZYX), /* 114 */ - TU6_xxx(R64G64_SFLOAT, 64_64_FLOAT, R64G64_FLOAT, WZYX), /* 115 */ - - /* 192-bit RGB */ - TU6_xxx(R64G64B64_UINT, 64_64_64_UINT, R64G64B64_UINT, WZYX), /* 116 */ - TU6_xxx(R64G64B64_SINT, 64_64_64_SINT, R64G64B64_SINT, WZYX), /* 117 */ - TU6_xxx(R64G64B64_SFLOAT, 64_64_64_FLOAT, R64G64B64_FLOAT, WZYX), /* 118 */ - - /* 256-bit RGBA */ - TU6_xxx(R64G64B64A64_UINT, 64_64_64_64_UINT, R64G64B64A64_UINT, WZYX), /* 119 */ - TU6_xxx(R64G64B64A64_SINT, 64_64_64_64_SINT, R64G64B64A64_SINT, WZYX), /* 120 */ - TU6_xxx(R64G64B64A64_SFLOAT, 64_64_64_64_FLOAT, R64G64B64A64_FLOAT, WZYX), /* 121 */ - - /* 32-bit packed float */ - TU6_VTC(B10G11R11_UFLOAT_PACK32, 11_11_10_FLOAT, R11G11B10_FLOAT, WZYX), /* 122 */ - TU6_xTx(E5B9G9R9_UFLOAT_PACK32, 9_9_9_E5_FLOAT, R9G9B9E5_FLOAT, WZYX), /* 123 */ - - /* depth/stencil */ - TU6_xTC(D16_UNORM, 16_UNORM, R16_UNORM, WZYX), /* 124 */ - TU6_xTC(X8_D24_UNORM_PACK32, X8Z24_UNORM, X8Z24_UNORM, WZYX), /* 125 */ - TU6_xTC(D32_SFLOAT, 32_FLOAT, R32_FLOAT, WZYX), /* 126 */ - TU6_xTC(S8_UINT, 8_UINT, R8_UNORM, WZYX), /* 127 */ - TU6_xxx(D16_UNORM_S8_UINT, X8Z16_UNORM, X8Z16_UNORM, WZYX), /* 128 */ - TU6_xTC(D24_UNORM_S8_UINT, X8Z24_UNORM, X8Z24_UNORM, WZYX), /* 129 */ - TU6_xTC(D32_SFLOAT_S8_UINT, 32_FLOAT, R32_FLOAT, WZYX), /* 130 */ - - /* compressed */ - TU6_xTx(BC1_RGB_UNORM_BLOCK, DXT1, DXT1, WZYX), /* 131 */ - TU6_xTx(BC1_RGB_SRGB_BLOCK, DXT1, DXT1, WZYX), /* 132 */ - TU6_xTx(BC1_RGBA_UNORM_BLOCK, DXT1, DXT1, WZYX), /* 133 */ - TU6_xTx(BC1_RGBA_SRGB_BLOCK, DXT1, DXT1, WZYX), /* 134 */ - TU6_xTx(BC2_UNORM_BLOCK, DXT3, DXT3, WZYX), /* 135 */ - TU6_xTx(BC2_SRGB_BLOCK, DXT3, DXT3, WZYX), /* 136 */ - TU6_xTx(BC3_UNORM_BLOCK, DXT5, DXT5, WZYX), /* 137 */ - TU6_xTx(BC3_SRGB_BLOCK, DXT5, DXT5, WZYX), /* 138 */ - TU6_xTx(BC4_UNORM_BLOCK, RGTC1_UNORM, RGTC1_UNORM, WZYX), /* 139 */ - TU6_xTx(BC4_SNORM_BLOCK, RGTC1_SNORM, RGTC1_SNORM, WZYX), /* 140 */ - TU6_xTx(BC5_UNORM_BLOCK, RGTC2_UNORM, RGTC2_UNORM, WZYX), /* 141 */ - TU6_xTx(BC5_SNORM_BLOCK, RGTC2_SNORM, RGTC2_SNORM, WZYX), /* 142 */ - TU6_xTx(BC6H_UFLOAT_BLOCK, BPTC_UFLOAT, BPTC_UFLOAT, WZYX), /* 143 */ - TU6_xTx(BC6H_SFLOAT_BLOCK, BPTC_FLOAT, BPTC_FLOAT, WZYX), /* 144 */ - TU6_xTx(BC7_UNORM_BLOCK, BPTC, BPTC, WZYX), /* 145 */ - TU6_xTx(BC7_SRGB_BLOCK, BPTC, BPTC, WZYX), /* 146 */ - TU6_xTx(ETC2_R8G8B8_UNORM_BLOCK, ETC2_RGB8, ETC2_RGB8, WZYX), /* 147 */ - TU6_xTx(ETC2_R8G8B8_SRGB_BLOCK, ETC2_RGB8, ETC2_RGB8, WZYX), /* 148 */ - TU6_xTx(ETC2_R8G8B8A1_UNORM_BLOCK, ETC2_RGB8A1, ETC2_RGB8A1, WZYX), /* 149 */ - TU6_xTx(ETC2_R8G8B8A1_SRGB_BLOCK, ETC2_RGB8A1, ETC2_RGB8A1, WZYX), /* 150 */ - TU6_xTx(ETC2_R8G8B8A8_UNORM_BLOCK, ETC2_RGBA8, ETC2_RGBA8, WZYX), /* 151 */ - TU6_xTx(ETC2_R8G8B8A8_SRGB_BLOCK, ETC2_RGBA8, ETC2_RGBA8, WZYX), /* 152 */ - TU6_xTx(EAC_R11_UNORM_BLOCK, ETC2_R11_UNORM, ETC2_R11_UNORM, WZYX), /* 153 */ - TU6_xTx(EAC_R11_SNORM_BLOCK, ETC2_R11_SNORM, ETC2_R11_SNORM, WZYX), /* 154 */ - TU6_xTx(EAC_R11G11_UNORM_BLOCK, ETC2_RG11_UNORM, ETC2_RG11_UNORM, WZYX), /* 155 */ - TU6_xTx(EAC_R11G11_SNORM_BLOCK, ETC2_RG11_SNORM, ETC2_RG11_SNORM, WZYX), /* 156 */ - TU6_xTx(ASTC_4x4_UNORM_BLOCK, ASTC_4x4, ASTC_4x4, WZYX), /* 157 */ - TU6_xTx(ASTC_4x4_SRGB_BLOCK, ASTC_4x4, ASTC_4x4, WZYX), /* 158 */ - TU6_xTx(ASTC_5x4_UNORM_BLOCK, ASTC_5x4, ASTC_5x4, WZYX), /* 159 */ - TU6_xTx(ASTC_5x4_SRGB_BLOCK, ASTC_5x4, ASTC_5x4, WZYX), /* 160 */ - TU6_xTx(ASTC_5x5_UNORM_BLOCK, ASTC_5x5, ASTC_5x5, WZYX), /* 161 */ - TU6_xTx(ASTC_5x5_SRGB_BLOCK, ASTC_5x5, ASTC_5x5, WZYX), /* 162 */ - TU6_xTx(ASTC_6x5_UNORM_BLOCK, ASTC_6x5, ASTC_6x5, WZYX), /* 163 */ - TU6_xTx(ASTC_6x5_SRGB_BLOCK, ASTC_6x5, ASTC_6x5, WZYX), /* 164 */ - TU6_xTx(ASTC_6x6_UNORM_BLOCK, ASTC_6x6, ASTC_6x6, WZYX), /* 165 */ - TU6_xTx(ASTC_6x6_SRGB_BLOCK, ASTC_6x6, ASTC_6x6, WZYX), /* 166 */ - TU6_xTx(ASTC_8x5_UNORM_BLOCK, ASTC_8x5, ASTC_8x5, WZYX), /* 167 */ - TU6_xTx(ASTC_8x5_SRGB_BLOCK, ASTC_8x5, ASTC_8x5, WZYX), /* 168 */ - TU6_xTx(ASTC_8x6_UNORM_BLOCK, ASTC_8x6, ASTC_8x6, WZYX), /* 169 */ - TU6_xTx(ASTC_8x6_SRGB_BLOCK, ASTC_8x6, ASTC_8x6, WZYX), /* 170 */ - TU6_xTx(ASTC_8x8_UNORM_BLOCK, ASTC_8x8, ASTC_8x8, WZYX), /* 171 */ - TU6_xTx(ASTC_8x8_SRGB_BLOCK, ASTC_8x8, ASTC_8x8, WZYX), /* 172 */ - TU6_xTx(ASTC_10x5_UNORM_BLOCK, ASTC_10x5, ASTC_10x5, WZYX), /* 173 */ - TU6_xTx(ASTC_10x5_SRGB_BLOCK, ASTC_10x5, ASTC_10x5, WZYX), /* 174 */ - TU6_xTx(ASTC_10x6_UNORM_BLOCK, ASTC_10x6, ASTC_10x6, WZYX), /* 175 */ - TU6_xTx(ASTC_10x6_SRGB_BLOCK, ASTC_10x6, ASTC_10x6, WZYX), /* 176 */ - TU6_xTx(ASTC_10x8_UNORM_BLOCK, ASTC_10x8, ASTC_10x8, WZYX), /* 177 */ - TU6_xTx(ASTC_10x8_SRGB_BLOCK, ASTC_10x8, ASTC_10x8, WZYX), /* 178 */ - TU6_xTx(ASTC_10x10_UNORM_BLOCK, ASTC_10x10, ASTC_10x10, WZYX), /* 179 */ - TU6_xTx(ASTC_10x10_SRGB_BLOCK, ASTC_10x10, ASTC_10x10, WZYX), /* 180 */ - TU6_xTx(ASTC_12x10_UNORM_BLOCK, ASTC_12x10, ASTC_12x10, WZYX), /* 181 */ - TU6_xTx(ASTC_12x10_SRGB_BLOCK, ASTC_12x10, ASTC_12x10, WZYX), /* 182 */ - TU6_xTx(ASTC_12x12_UNORM_BLOCK, ASTC_12x12, ASTC_12x12, WZYX), /* 183 */ - TU6_xTx(ASTC_12x12_SRGB_BLOCK, ASTC_12x12, ASTC_12x12, WZYX), /* 184 */ -}; -#undef TU_FORMAT_TABLE_FIRST -#undef TU_FORMAT_TABLE_LAST - -const struct tu_native_format * -tu6_get_native_format(VkFormat format) -{ - const struct tu_native_format *fmt = NULL; - - if (format >= tu6_format_table0_first && format <= tu6_format_table0_last) - fmt = &tu6_format_table0[format - tu6_format_table0_first]; - - return (fmt && fmt->present) ? fmt : NULL; -} - -enum a6xx_2d_ifmt -tu6_rb_fmt_to_ifmt(enum a6xx_color_fmt fmt) -{ - switch (fmt) { - case RB6_A8_UNORM: - case RB6_R8_UNORM: - case RB6_R8_SNORM: - case RB6_R8G8_UNORM: - case RB6_R8G8_SNORM: - case RB6_R8G8B8A8_UNORM: - case RB6_R8G8B8_UNORM: - case RB6_R8G8B8A8_SNORM: - return R2D_UNORM8; - - case RB6_R32_UINT: - case RB6_R32_SINT: - case RB6_R32G32_UINT: - case RB6_R32G32_SINT: - case RB6_R32G32B32A32_UINT: - case RB6_R32G32B32A32_SINT: - return R2D_INT32; - - case RB6_R16_UINT: - case RB6_R16_SINT: - case RB6_R16G16_UINT: - case RB6_R16G16_SINT: - case RB6_R16G16B16A16_UINT: - case RB6_R16G16B16A16_SINT: - return R2D_INT16; - - case RB6_R8_UINT: - case RB6_R8_SINT: - case RB6_R8G8_UINT: - case RB6_R8G8_SINT: - case RB6_R8G8B8A8_UINT: - case RB6_R8G8B8A8_SINT: - return R2D_INT8; - - case RB6_R16_UNORM: - case RB6_R16_SNORM: - case RB6_R16G16_UNORM: - case RB6_R16G16_SNORM: - case RB6_R16G16B16A16_UNORM: - case RB6_R16G16B16A16_SNORM: - case RB6_R32_FLOAT: - case RB6_R32G32_FLOAT: - case RB6_R32G32B32A32_FLOAT: - return R2D_FLOAT32; - - case RB6_R16_FLOAT: - case RB6_R16G16_FLOAT: - case RB6_R16G16B16A16_FLOAT: - return R2D_FLOAT16; - - case RB6_R4G4B4A4_UNORM: - case RB6_R5G5B5A1_UNORM: - case RB6_R5G6B5_UNORM: - case RB6_R10G10B10A2_UNORM: - case RB6_R10G10B10A2_UINT: - case RB6_R11G11B10_FLOAT: - case RB6_X8Z24_UNORM: - // ??? - return 0; - default: - unreachable("bad format"); - return 0; - } -} - -static uint32_t -tu_pack_mask(int bits) -{ - assert(bits <= 32); - return (1ull << bits) - 1; -} - -static uint32_t -tu_pack_float32_for_unorm(float val, int bits) -{ - const uint32_t max = tu_pack_mask(bits); - if (val < 0.0f) - return 0; - else if (val > 1.0f) - return max; - else - return _mesa_lroundevenf(val * (float) max); -} - -static uint32_t -tu_pack_float32_for_snorm(float val, int bits) -{ - const int32_t max = tu_pack_mask(bits - 1); - int32_t tmp; - if (val < -1.0f) - tmp = -max; - else if (val > 1.0f) - tmp = max; - else - tmp = _mesa_lroundevenf(val * (float) max); - - return tmp & tu_pack_mask(bits); -} - -static uint32_t -tu_pack_float32_for_uscaled(float val, int bits) -{ - const uint32_t max = tu_pack_mask(bits); - if (val < 0.0f) - return 0; - else if (val > (float) max) - return max; - else - return (uint32_t) val; -} - -static uint32_t -tu_pack_float32_for_sscaled(float val, int bits) -{ - const int32_t max = tu_pack_mask(bits - 1); - const int32_t min = -max - 1; - int32_t tmp; - if (val < (float) min) - tmp = min; - else if (val > (float) max) - tmp = max; - else - tmp = (int32_t) val; - - return tmp & tu_pack_mask(bits); -} - -static uint32_t -tu_pack_uint32_for_uint(uint32_t val, int bits) -{ - return val & tu_pack_mask(bits); -} - -static uint32_t -tu_pack_int32_for_sint(int32_t val, int bits) -{ - return val & tu_pack_mask(bits); -} - -static uint32_t -tu_pack_float32_for_sfloat(float val, int bits) -{ - assert(bits == 16 || bits == 32); - return bits == 16 ? util_float_to_half(val) : fui(val); -} - -union tu_clear_component_value { - float float32; - int32_t int32; - uint32_t uint32; -}; - -static uint32_t -tu_pack_clear_component_value(union tu_clear_component_value val, - const struct vk_format_channel_description *ch) -{ - uint32_t packed; - - switch (ch->type) { - case VK_FORMAT_TYPE_UNSIGNED: - /* normalized, scaled, or pure integer */ - assert(ch->normalized + ch->scaled + ch->pure_integer == 1); - if (ch->normalized) - packed = tu_pack_float32_for_unorm(val.float32, ch->size); - else if (ch->scaled) - packed = tu_pack_float32_for_uscaled(val.float32, ch->size); - else - packed = tu_pack_uint32_for_uint(val.uint32, ch->size); - break; - case VK_FORMAT_TYPE_SIGNED: - /* normalized, scaled, or pure integer */ - assert(ch->normalized + ch->scaled + ch->pure_integer == 1); - if (ch->normalized) - packed = tu_pack_float32_for_snorm(val.float32, ch->size); - else if (ch->scaled) - packed = tu_pack_float32_for_sscaled(val.float32, ch->size); - else - packed = tu_pack_int32_for_sint(val.int32, ch->size); - break; - case VK_FORMAT_TYPE_FLOAT: - packed = tu_pack_float32_for_sfloat(val.float32, ch->size); - break; - default: - unreachable("unexpected channel type"); - packed = 0; - break; - } - - assert((packed & tu_pack_mask(ch->size)) == packed); - return packed; -} - -static const struct vk_format_channel_description * -tu_get_format_channel_description(const struct vk_format_description *desc, - int comp) -{ - switch (desc->swizzle[comp]) { - case VK_SWIZZLE_X: - return &desc->channel[0]; - case VK_SWIZZLE_Y: - return &desc->channel[1]; - case VK_SWIZZLE_Z: - return &desc->channel[2]; - case VK_SWIZZLE_W: - return &desc->channel[3]; - default: - return NULL; - } -} - -static union tu_clear_component_value -tu_get_clear_component_value(const VkClearValue *val, int comp, bool color) -{ - union tu_clear_component_value tmp; - if (color) { - assert(comp < 4); - tmp.uint32 = val->color.uint32[comp]; - } else { - assert(comp < 2); - if (comp == 0) - tmp.float32 = val->depthStencil.depth; - else - tmp.uint32 = val->depthStencil.stencil; - } - - return tmp; -} - -/** - * Pack a VkClearValue into a 128-bit buffer. \a format is respected except - * for the component order. The components are always packed in WZYX order - * (i.e., msb is white and lsb is red). - * - * Return the number of uint32_t's used. - */ -int -tu_pack_clear_value(const VkClearValue *val, VkFormat format, uint32_t buf[4]) -{ - const struct vk_format_description *desc = vk_format_description(format); - assert(desc && desc->layout == VK_FORMAT_LAYOUT_PLAIN); - - /* S8_UINT is special and has no depth */ - const int max_components = - format == VK_FORMAT_S8_UINT ? 2 : desc->nr_channels; - - int buf_offset = 0; - int bit_shift = 0; - for (int comp = 0; comp < max_components; comp++) { - const struct vk_format_channel_description *ch = - tu_get_format_channel_description(desc, comp); - if (!ch) { - assert(format == VK_FORMAT_S8_UINT && comp == 0); - continue; - } - - union tu_clear_component_value v = tu_get_clear_component_value( - val, comp, desc->colorspace != VK_FORMAT_COLORSPACE_ZS); - - /* move to the next uint32_t when there is not enough space */ - assert(ch->size <= 32); - if (bit_shift + ch->size > 32) { - buf_offset++; - bit_shift = 0; - } - - if (bit_shift == 0) - buf[buf_offset] = 0; - - buf[buf_offset] |= tu_pack_clear_component_value(v, ch) << bit_shift; - bit_shift += ch->size; - } - - return buf_offset + 1; -} - -static void -tu_physical_device_get_format_properties( - struct tu_physical_device *physical_device, - VkFormat format, - VkFormatProperties *out_properties) -{ - VkFormatFeatureFlags linear = 0, tiled = 0, buffer = 0; - const struct vk_format_description *desc = vk_format_description(format); - const struct tu_native_format *native_fmt = tu6_get_native_format(format); - if (!desc || !native_fmt) { - out_properties->linearTilingFeatures = linear; - out_properties->optimalTilingFeatures = tiled; - out_properties->bufferFeatures = buffer; - return; - } - - linear |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT; - tiled |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT; - buffer |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT; - - if (native_fmt->tex >= 0) { - linear |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; - tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; - buffer |= VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT; - } - - if (native_fmt->rb >= 0) { - linear |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT; - tiled |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT; - } - - if (native_fmt->vtx >= 0) { - buffer |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT; - } - - out_properties->linearTilingFeatures = linear; - out_properties->optimalTilingFeatures = tiled; - out_properties->bufferFeatures = buffer; -} - -void -tu_GetPhysicalDeviceFormatProperties(VkPhysicalDevice physicalDevice, - VkFormat format, - VkFormatProperties *pFormatProperties) -{ - TU_FROM_HANDLE(tu_physical_device, physical_device, physicalDevice); - - tu_physical_device_get_format_properties(physical_device, format, - pFormatProperties); -} - -void -tu_GetPhysicalDeviceFormatProperties2( - VkPhysicalDevice physicalDevice, - VkFormat format, - VkFormatProperties2 *pFormatProperties) -{ - TU_FROM_HANDLE(tu_physical_device, physical_device, physicalDevice); - - tu_physical_device_get_format_properties( - physical_device, format, &pFormatProperties->formatProperties); -} - -static VkResult -tu_get_image_format_properties( - struct tu_physical_device *physical_device, - const VkPhysicalDeviceImageFormatInfo2 *info, - VkImageFormatProperties *pImageFormatProperties) - -{ - VkFormatProperties format_props; - VkFormatFeatureFlags format_feature_flags; - VkExtent3D maxExtent; - uint32_t maxMipLevels; - uint32_t maxArraySize; - VkSampleCountFlags sampleCounts = VK_SAMPLE_COUNT_1_BIT; - - tu_physical_device_get_format_properties(physical_device, info->format, - &format_props); - if (info->tiling == VK_IMAGE_TILING_LINEAR) { - format_feature_flags = format_props.linearTilingFeatures; - } else if (info->tiling == VK_IMAGE_TILING_OPTIMAL) { - format_feature_flags = format_props.optimalTilingFeatures; - } else { - unreachable("bad VkImageTiling"); - } - - if (format_feature_flags == 0) - goto unsupported; - - if (info->type != VK_IMAGE_TYPE_2D && - vk_format_is_depth_or_stencil(info->format)) - goto unsupported; - - switch (info->type) { - default: - unreachable("bad vkimage type\n"); - case VK_IMAGE_TYPE_1D: - maxExtent.width = 16384; - maxExtent.height = 1; - maxExtent.depth = 1; - maxMipLevels = 15; /* log2(maxWidth) + 1 */ - maxArraySize = 2048; - break; - case VK_IMAGE_TYPE_2D: - maxExtent.width = 16384; - maxExtent.height = 16384; - maxExtent.depth = 1; - maxMipLevels = 15; /* log2(maxWidth) + 1 */ - maxArraySize = 2048; - break; - case VK_IMAGE_TYPE_3D: - maxExtent.width = 2048; - maxExtent.height = 2048; - maxExtent.depth = 2048; - maxMipLevels = 12; /* log2(maxWidth) + 1 */ - maxArraySize = 1; - break; - } - - if (info->tiling == VK_IMAGE_TILING_OPTIMAL && - info->type == VK_IMAGE_TYPE_2D && - (format_feature_flags & - (VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | - VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) && - !(info->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) && - !(info->usage & VK_IMAGE_USAGE_STORAGE_BIT)) { - sampleCounts |= VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | - VK_SAMPLE_COUNT_8_BIT; - } - - if (info->usage & VK_IMAGE_USAGE_SAMPLED_BIT) { - if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) { - goto unsupported; - } - } - - if (info->usage & VK_IMAGE_USAGE_STORAGE_BIT) { - if (!(format_feature_flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) { - goto unsupported; - } - } - - if (info->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) { - if (!(format_feature_flags & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) { - goto unsupported; - } - } - - if (info->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) { - if (!(format_feature_flags & - VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) { - goto unsupported; - } - } - - *pImageFormatProperties = (VkImageFormatProperties) { - .maxExtent = maxExtent, - .maxMipLevels = maxMipLevels, - .maxArrayLayers = maxArraySize, - .sampleCounts = sampleCounts, - - /* FINISHME: Accurately calculate - * VkImageFormatProperties::maxResourceSize. - */ - .maxResourceSize = UINT32_MAX, - }; - - return VK_SUCCESS; -unsupported: - *pImageFormatProperties = (VkImageFormatProperties) { - .maxExtent = { 0, 0, 0 }, - .maxMipLevels = 0, - .maxArrayLayers = 0, - .sampleCounts = 0, - .maxResourceSize = 0, - }; - - return VK_ERROR_FORMAT_NOT_SUPPORTED; -} - -VkResult -tu_GetPhysicalDeviceImageFormatProperties( - VkPhysicalDevice physicalDevice, - VkFormat format, - VkImageType type, - VkImageTiling tiling, - VkImageUsageFlags usage, - VkImageCreateFlags createFlags, - VkImageFormatProperties *pImageFormatProperties) -{ - TU_FROM_HANDLE(tu_physical_device, physical_device, physicalDevice); - - const VkPhysicalDeviceImageFormatInfo2 info = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2, - .pNext = NULL, - .format = format, - .type = type, - .tiling = tiling, - .usage = usage, - .flags = createFlags, - }; - - return tu_get_image_format_properties(physical_device, &info, - pImageFormatProperties); -} - -static VkResult -tu_get_external_image_format_properties( - const struct tu_physical_device *physical_device, - const VkPhysicalDeviceImageFormatInfo2 *pImageFormatInfo, - VkExternalMemoryHandleTypeFlagBits handleType, - VkExternalMemoryProperties *external_properties) -{ - VkExternalMemoryFeatureFlagBits flags = 0; - VkExternalMemoryHandleTypeFlags export_flags = 0; - VkExternalMemoryHandleTypeFlags compat_flags = 0; - - /* From the Vulkan 1.1.98 spec: - * - * If handleType is not compatible with the format, type, tiling, - * usage, and flags specified in VkPhysicalDeviceImageFormatInfo2, - * then vkGetPhysicalDeviceImageFormatProperties2 returns - * VK_ERROR_FORMAT_NOT_SUPPORTED. - */ - - switch (handleType) { - case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT: - case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: - switch (pImageFormatInfo->type) { - case VK_IMAGE_TYPE_2D: - flags = VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT | - VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | - VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT; - compat_flags = export_flags = - VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT | - VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; - break; - default: - return vk_errorf(physical_device->instance, VK_ERROR_FORMAT_NOT_SUPPORTED, - "VkExternalMemoryTypeFlagBits(0x%x) unsupported for VkImageType(%d)", - handleType, pImageFormatInfo->type); - } - break; - case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: - flags = VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT; - compat_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT; - break; - default: - return vk_errorf(physical_device->instance, VK_ERROR_FORMAT_NOT_SUPPORTED, - "VkExternalMemoryTypeFlagBits(0x%x) unsupported", - handleType); - } - - *external_properties = (VkExternalMemoryProperties) { - .externalMemoryFeatures = flags, - .exportFromImportedHandleTypes = export_flags, - .compatibleHandleTypes = compat_flags, - }; - - return VK_SUCCESS; -} - -VkResult -tu_GetPhysicalDeviceImageFormatProperties2( - VkPhysicalDevice physicalDevice, - const VkPhysicalDeviceImageFormatInfo2 *base_info, - VkImageFormatProperties2 *base_props) -{ - TU_FROM_HANDLE(tu_physical_device, physical_device, physicalDevice); - const VkPhysicalDeviceExternalImageFormatInfo *external_info = NULL; - VkExternalImageFormatProperties *external_props = NULL; - VkResult result; - - result = tu_get_image_format_properties( - physical_device, base_info, &base_props->imageFormatProperties); - if (result != VK_SUCCESS) - return result; - - /* Extract input structs */ - vk_foreach_struct_const(s, base_info->pNext) - { - switch (s->sType) { - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO: - external_info = (const void *) s; - break; - default: - break; - } - } - - /* Extract output structs */ - vk_foreach_struct(s, base_props->pNext) - { - switch (s->sType) { - case VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES: - external_props = (void *) s; - break; - default: - break; - } - } - - /* From the Vulkan 1.0.42 spec: - * - * If handleType is 0, vkGetPhysicalDeviceImageFormatProperties2 will - * behave as if VkPhysicalDeviceExternalImageFormatInfo was not - * present and VkExternalImageFormatProperties will be ignored. - */ - if (external_info && external_info->handleType != 0) { - result = tu_get_external_image_format_properties( - physical_device, base_info, external_info->handleType, - &external_props->externalMemoryProperties); - if (result != VK_SUCCESS) - goto fail; - } - - return VK_SUCCESS; - -fail: - if (result == VK_ERROR_FORMAT_NOT_SUPPORTED) { - /* From the Vulkan 1.0.42 spec: - * - * If the combination of parameters to - * vkGetPhysicalDeviceImageFormatProperties2 is not supported by - * the implementation for use in vkCreateImage, then all members of - * imageFormatProperties will be filled with zero. - */ - base_props->imageFormatProperties = (VkImageFormatProperties) { 0 }; - } - - return result; -} - -void -tu_GetPhysicalDeviceSparseImageFormatProperties( - VkPhysicalDevice physicalDevice, - VkFormat format, - VkImageType type, - uint32_t samples, - VkImageUsageFlags usage, - VkImageTiling tiling, - uint32_t *pNumProperties, - VkSparseImageFormatProperties *pProperties) -{ - /* Sparse images are not yet supported. */ - *pNumProperties = 0; -} - -void -tu_GetPhysicalDeviceSparseImageFormatProperties2( - VkPhysicalDevice physicalDevice, - const VkPhysicalDeviceSparseImageFormatInfo2 *pFormatInfo, - uint32_t *pPropertyCount, - VkSparseImageFormatProperties2 *pProperties) -{ - /* Sparse images are not yet supported. */ - *pPropertyCount = 0; -} - -void -tu_GetPhysicalDeviceExternalBufferProperties( - VkPhysicalDevice physicalDevice, - const VkPhysicalDeviceExternalBufferInfo *pExternalBufferInfo, - VkExternalBufferProperties *pExternalBufferProperties) -{ - VkExternalMemoryFeatureFlagBits flags = 0; - VkExternalMemoryHandleTypeFlags export_flags = 0; - VkExternalMemoryHandleTypeFlags compat_flags = 0; - switch (pExternalBufferInfo->handleType) { - case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT: - case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: - flags = VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | - VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT; - compat_flags = export_flags = - VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT | - VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; - break; - case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: - flags = VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT; - compat_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT; - break; - default: - break; - } - pExternalBufferProperties->externalMemoryProperties = - (VkExternalMemoryProperties) { - .externalMemoryFeatures = flags, - .exportFromImportedHandleTypes = export_flags, - .compatibleHandleTypes = compat_flags, - }; -} diff --git a/lib/mesa/src/freedreno/vulkan/tu_image.c b/lib/mesa/src/freedreno/vulkan/tu_image.c deleted file mode 100644 index 657612d42..000000000 --- a/lib/mesa/src/freedreno/vulkan/tu_image.c +++ /dev/null @@ -1,380 +0,0 @@ -/* - * Copyright © 2016 Red Hat. - * Copyright © 2016 Bas Nieuwenhuizen - * - * based in part on anv driver which is: - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "tu_private.h" - -#include "util/debug.h" -#include "util/u_atomic.h" -#include "vk_format.h" -#include "vk_util.h" - -static inline bool -image_level_linear(struct tu_image *image, int level) -{ - unsigned w = u_minify(image->extent.width, level); - return w < 16; -} - -/* indexed by cpp: */ -static const struct -{ - unsigned pitchalign; - unsigned heightalign; -} tile_alignment[] = { - [1] = { 128, 32 }, [2] = { 128, 16 }, [3] = { 128, 16 }, [4] = { 64, 16 }, - [8] = { 64, 16 }, [12] = { 64, 16 }, [16] = { 64, 16 }, -}; - -static void -setup_slices(struct tu_image *image, const VkImageCreateInfo *pCreateInfo) -{ - enum vk_format_layout layout = - vk_format_description(pCreateInfo->format)->layout; - uint32_t layer_size = 0; - uint32_t width = pCreateInfo->extent.width; - uint32_t height = pCreateInfo->extent.height; - uint32_t depth = pCreateInfo->extent.depth; - bool layer_first = pCreateInfo->imageType != VK_IMAGE_TYPE_3D; - uint32_t alignment = pCreateInfo->imageType == VK_IMAGE_TYPE_3D ? 4096 : 1; - uint32_t cpp = vk_format_get_blocksize(pCreateInfo->format); - - uint32_t heightalign = tile_alignment[cpp].heightalign; - - for (unsigned level = 0; level < pCreateInfo->mipLevels; level++) { - struct tu_image_level *slice = &image->levels[level]; - bool linear_level = image_level_linear(image, level); - uint32_t aligned_height = height; - uint32_t blocks; - uint32_t pitchalign; - - if (image->tile_mode && !linear_level) { - pitchalign = tile_alignment[cpp].pitchalign; - aligned_height = align(aligned_height, heightalign); - } else { - pitchalign = 64; - - /* The blits used for mem<->gmem work at a granularity of - * 32x32, which can cause faults due to over-fetch on the - * last level. The simple solution is to over-allocate a - * bit the last level to ensure any over-fetch is harmless. - * The pitch is already sufficiently aligned, but height - * may not be: - */ - if ((level + 1 == pCreateInfo->mipLevels)) - aligned_height = align(aligned_height, 32); - } - - if (layout == VK_FORMAT_LAYOUT_ASTC) - slice->pitch = util_align_npot( - width, - pitchalign * vk_format_get_blockwidth(pCreateInfo->format)); - else - slice->pitch = align(width, pitchalign); - - slice->offset = layer_size; - blocks = vk_format_get_block_count(pCreateInfo->format, slice->pitch, - aligned_height); - - /* 1d array and 2d array textures must all have the same layer size - * for each miplevel on a3xx. 3d textures can have different layer - * sizes for high levels, but the hw auto-sizer is buggy (or at least - * different than what this code does), so as soon as the layer size - * range gets into range, we stop reducing it. - */ - if (pCreateInfo->imageType == VK_IMAGE_TYPE_3D && - (level == 1 || - (level > 1 && image->levels[level - 1].size > 0xf000))) - slice->size = align(blocks * cpp, alignment); - else if (level == 0 || layer_first || alignment == 1) - slice->size = align(blocks * cpp, alignment); - else - slice->size = image->levels[level - 1].size; - - layer_size += slice->size * depth; - - width = u_minify(width, 1); - height = u_minify(height, 1); - depth = u_minify(depth, 1); - } - - image->layer_size = layer_size; -} - -VkResult -tu_image_create(VkDevice _device, - const struct tu_image_create_info *create_info, - const VkAllocationCallbacks *alloc, - VkImage *pImage) -{ - TU_FROM_HANDLE(tu_device, device, _device); - const VkImageCreateInfo *pCreateInfo = create_info->vk_info; - struct tu_image *image = NULL; - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO); - - tu_assert(pCreateInfo->mipLevels > 0); - tu_assert(pCreateInfo->arrayLayers > 0); - tu_assert(pCreateInfo->samples > 0); - tu_assert(pCreateInfo->extent.width > 0); - tu_assert(pCreateInfo->extent.height > 0); - tu_assert(pCreateInfo->extent.depth > 0); - - image = vk_zalloc2(&device->alloc, alloc, sizeof(*image), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!image) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - - image->type = pCreateInfo->imageType; - - image->vk_format = pCreateInfo->format; - image->tiling = pCreateInfo->tiling; - image->usage = pCreateInfo->usage; - image->flags = pCreateInfo->flags; - image->extent = pCreateInfo->extent; - image->level_count = pCreateInfo->mipLevels; - image->layer_count = pCreateInfo->arrayLayers; - - image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE; - if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) { - for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i) - if (pCreateInfo->pQueueFamilyIndices[i] == - VK_QUEUE_FAMILY_EXTERNAL) - image->queue_family_mask |= (1u << TU_MAX_QUEUE_FAMILIES) - 1u; - else - image->queue_family_mask |= - 1u << pCreateInfo->pQueueFamilyIndices[i]; - } - - image->shareable = - vk_find_struct_const(pCreateInfo->pNext, - EXTERNAL_MEMORY_IMAGE_CREATE_INFO) != NULL; - - image->tile_mode = pCreateInfo->tiling == VK_IMAGE_TILING_OPTIMAL ? 3 : 0; - setup_slices(image, pCreateInfo); - - image->size = image->layer_size * pCreateInfo->arrayLayers; - *pImage = tu_image_to_handle(image); - - return VK_SUCCESS; -} - -void -tu_image_view_init(struct tu_image_view *iview, - struct tu_device *device, - const VkImageViewCreateInfo *pCreateInfo) -{ - TU_FROM_HANDLE(tu_image, image, pCreateInfo->image); - const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; - - switch (image->type) { - case VK_IMAGE_TYPE_1D: - case VK_IMAGE_TYPE_2D: - assert(range->baseArrayLayer + tu_get_layerCount(image, range) <= - image->layer_count); - break; - case VK_IMAGE_TYPE_3D: - assert(range->baseArrayLayer + tu_get_layerCount(image, range) <= - tu_minify(image->extent.depth, range->baseMipLevel)); - break; - default: - unreachable("bad VkImageType"); - } - - iview->image = image; - iview->type = pCreateInfo->viewType; - iview->vk_format = pCreateInfo->format; - iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask; - - if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) { - iview->vk_format = vk_format_stencil_only(iview->vk_format); - } else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) { - iview->vk_format = vk_format_depth_only(iview->vk_format); - } - - // should we minify? - iview->extent = image->extent; - - iview->base_layer = range->baseArrayLayer; - iview->layer_count = tu_get_layerCount(image, range); - iview->base_mip = range->baseMipLevel; - iview->level_count = tu_get_levelCount(image, range); -} - -unsigned -tu_image_queue_family_mask(const struct tu_image *image, - uint32_t family, - uint32_t queue_family) -{ - if (!image->exclusive) - return image->queue_family_mask; - if (family == VK_QUEUE_FAMILY_EXTERNAL) - return (1u << TU_MAX_QUEUE_FAMILIES) - 1u; - if (family == VK_QUEUE_FAMILY_IGNORED) - return 1u << queue_family; - return 1u << family; -} - -VkResult -tu_CreateImage(VkDevice device, - const VkImageCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkImage *pImage) -{ -#ifdef ANDROID - const VkNativeBufferANDROID *gralloc_info = - vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID); - - if (gralloc_info) - return tu_image_from_gralloc(device, pCreateInfo, gralloc_info, - pAllocator, pImage); -#endif - - return tu_image_create(device, - &(struct tu_image_create_info) { - .vk_info = pCreateInfo, - .scanout = false, - }, - pAllocator, pImage); -} - -void -tu_DestroyImage(VkDevice _device, - VkImage _image, - const VkAllocationCallbacks *pAllocator) -{ - TU_FROM_HANDLE(tu_device, device, _device); - TU_FROM_HANDLE(tu_image, image, _image); - - if (!image) - return; - - if (image->owned_memory != VK_NULL_HANDLE) - tu_FreeMemory(_device, image->owned_memory, pAllocator); - - vk_free2(&device->alloc, pAllocator, image); -} - -void -tu_GetImageSubresourceLayout(VkDevice _device, - VkImage _image, - const VkImageSubresource *pSubresource, - VkSubresourceLayout *pLayout) -{ - TU_FROM_HANDLE(tu_image, image, _image); - - const uint32_t layer_offset = image->layer_size * pSubresource->arrayLayer; - const struct tu_image_level *level = - image->levels + pSubresource->mipLevel; - - pLayout->offset = layer_offset + level->offset; - pLayout->size = level->size; - pLayout->rowPitch = - level->pitch * vk_format_get_blocksize(image->vk_format); - pLayout->arrayPitch = image->layer_size; - pLayout->depthPitch = level->size; -} - -VkResult -tu_CreateImageView(VkDevice _device, - const VkImageViewCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkImageView *pView) -{ - TU_FROM_HANDLE(tu_device, device, _device); - struct tu_image_view *view; - - view = vk_alloc2(&device->alloc, pAllocator, sizeof(*view), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (view == NULL) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - - tu_image_view_init(view, device, pCreateInfo); - - *pView = tu_image_view_to_handle(view); - - return VK_SUCCESS; -} - -void -tu_DestroyImageView(VkDevice _device, - VkImageView _iview, - const VkAllocationCallbacks *pAllocator) -{ - TU_FROM_HANDLE(tu_device, device, _device); - TU_FROM_HANDLE(tu_image_view, iview, _iview); - - if (!iview) - return; - vk_free2(&device->alloc, pAllocator, iview); -} - -void -tu_buffer_view_init(struct tu_buffer_view *view, - struct tu_device *device, - const VkBufferViewCreateInfo *pCreateInfo) -{ - TU_FROM_HANDLE(tu_buffer, buffer, pCreateInfo->buffer); - - view->range = pCreateInfo->range == VK_WHOLE_SIZE - ? buffer->size - pCreateInfo->offset - : pCreateInfo->range; - view->vk_format = pCreateInfo->format; -} - -VkResult -tu_CreateBufferView(VkDevice _device, - const VkBufferViewCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkBufferView *pView) -{ - TU_FROM_HANDLE(tu_device, device, _device); - struct tu_buffer_view *view; - - view = vk_alloc2(&device->alloc, pAllocator, sizeof(*view), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!view) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - - tu_buffer_view_init(view, device, pCreateInfo); - - *pView = tu_buffer_view_to_handle(view); - - return VK_SUCCESS; -} - -void -tu_DestroyBufferView(VkDevice _device, - VkBufferView bufferView, - const VkAllocationCallbacks *pAllocator) -{ - TU_FROM_HANDLE(tu_device, device, _device); - TU_FROM_HANDLE(tu_buffer_view, view, bufferView); - - if (!view) - return; - - vk_free2(&device->alloc, pAllocator, view); -} diff --git a/lib/mesa/src/freedreno/vulkan/tu_kgsl.c b/lib/mesa/src/freedreno/vulkan/tu_kgsl.c deleted file mode 100644 index 6d968415f..000000000 --- a/lib/mesa/src/freedreno/vulkan/tu_kgsl.c +++ /dev/null @@ -1,668 +0,0 @@ -/* - * Copyright © 2020 Google, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "tu_private.h" - -#include <errno.h> -#include <fcntl.h> -#include <stdint.h> -#include <sys/ioctl.h> -#include <sys/mman.h> - -#include "msm_kgsl.h" -#include "vk_util.h" - -struct tu_syncobj { - struct vk_object_base base; - uint32_t timestamp; - bool timestamp_valid; -}; - -static int -safe_ioctl(int fd, unsigned long request, void *arg) -{ - int ret; - - do { - ret = ioctl(fd, request, arg); - } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); - - return ret; -} - -int -tu_drm_submitqueue_new(const struct tu_device *dev, - int priority, - uint32_t *queue_id) -{ - struct kgsl_drawctxt_create req = { - .flags = KGSL_CONTEXT_SAVE_GMEM | - KGSL_CONTEXT_NO_GMEM_ALLOC | - KGSL_CONTEXT_PREAMBLE, - }; - - int ret = safe_ioctl(dev->physical_device->local_fd, IOCTL_KGSL_DRAWCTXT_CREATE, &req); - if (ret) - return ret; - - *queue_id = req.drawctxt_id; - - return 0; -} - -void -tu_drm_submitqueue_close(const struct tu_device *dev, uint32_t queue_id) -{ - struct kgsl_drawctxt_destroy req = { - .drawctxt_id = queue_id, - }; - - safe_ioctl(dev->physical_device->local_fd, IOCTL_KGSL_DRAWCTXT_DESTROY, &req); -} - -VkResult -tu_bo_init_new(struct tu_device *dev, struct tu_bo *bo, uint64_t size, bool dump) -{ - struct kgsl_gpumem_alloc_id req = { - .size = size, - }; - int ret; - - ret = safe_ioctl(dev->physical_device->local_fd, - IOCTL_KGSL_GPUMEM_ALLOC_ID, &req); - if (ret) { - return vk_errorf(dev->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY, - "GPUMEM_ALLOC_ID failed (%s)", strerror(errno)); - } - - *bo = (struct tu_bo) { - .gem_handle = req.id, - .size = req.mmapsize, - .iova = req.gpuaddr, - }; - - return VK_SUCCESS; -} - -VkResult -tu_bo_init_dmabuf(struct tu_device *dev, - struct tu_bo *bo, - uint64_t size, - int fd) -{ - struct kgsl_gpuobj_import_dma_buf import_dmabuf = { - .fd = fd, - }; - struct kgsl_gpuobj_import req = { - .priv = (uintptr_t)&import_dmabuf, - .priv_len = sizeof(import_dmabuf), - .flags = 0, - .type = KGSL_USER_MEM_TYPE_DMABUF, - }; - int ret; - - ret = safe_ioctl(dev->physical_device->local_fd, - IOCTL_KGSL_GPUOBJ_IMPORT, &req); - if (ret) - return vk_errorf(dev->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY, - "Failed to import dma-buf (%s)\n", strerror(errno)); - - struct kgsl_gpuobj_info info_req = { - .id = req.id, - }; - - ret = safe_ioctl(dev->physical_device->local_fd, - IOCTL_KGSL_GPUOBJ_INFO, &info_req); - if (ret) - return vk_errorf(dev->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY, - "Failed to get dma-buf info (%s)\n", strerror(errno)); - - *bo = (struct tu_bo) { - .gem_handle = req.id, - .size = info_req.size, - .iova = info_req.gpuaddr, - }; - - return VK_SUCCESS; -} - -int -tu_bo_export_dmabuf(struct tu_device *dev, struct tu_bo *bo) -{ - tu_stub(); - - return -1; -} - -VkResult -tu_bo_map(struct tu_device *dev, struct tu_bo *bo) -{ - if (bo->map) - return VK_SUCCESS; - - uint64_t offset = bo->gem_handle << 12; - void *map = mmap(0, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED, - dev->physical_device->local_fd, offset); - if (map == MAP_FAILED) - return vk_error(dev->instance, VK_ERROR_MEMORY_MAP_FAILED); - - bo->map = map; - - return VK_SUCCESS; -} - -void -tu_bo_finish(struct tu_device *dev, struct tu_bo *bo) -{ - assert(bo->gem_handle); - - if (bo->map) - munmap(bo->map, bo->size); - - struct kgsl_gpumem_free_id req = { - .id = bo->gem_handle - }; - - safe_ioctl(dev->physical_device->local_fd, IOCTL_KGSL_GPUMEM_FREE_ID, &req); -} - -static VkResult -get_kgsl_prop(int fd, unsigned int type, void *value, size_t size) -{ - struct kgsl_device_getproperty getprop = { - .type = type, - .value = value, - .sizebytes = size, - }; - - return safe_ioctl(fd, IOCTL_KGSL_DEVICE_GETPROPERTY, &getprop); -} - -VkResult -tu_enumerate_devices(struct tu_instance *instance) -{ - static const char path[] = "/dev/kgsl-3d0"; - int fd; - - struct tu_physical_device *device = &instance->physical_devices[0]; - - if (instance->vk.enabled_extensions.KHR_display) - return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, - "I can't KHR_display"); - - fd = open(path, O_RDWR | O_CLOEXEC); - if (fd < 0) { - instance->physical_device_count = 0; - return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, - "failed to open device %s", path); - } - - struct kgsl_devinfo info; - if (get_kgsl_prop(fd, KGSL_PROP_DEVICE_INFO, &info, sizeof(info))) - goto fail; - - uint64_t gmem_iova; - if (get_kgsl_prop(fd, KGSL_PROP_UCHE_GMEM_VADDR, &gmem_iova, sizeof(gmem_iova))) - goto fail; - - /* kgsl version check? */ - - if (instance->debug_flags & TU_DEBUG_STARTUP) - mesa_logi("Found compatible device '%s'.", path); - - device->instance = instance; - device->master_fd = -1; - device->local_fd = fd; - - device->gpu_id = - ((info.chip_id >> 24) & 0xff) * 100 + - ((info.chip_id >> 16) & 0xff) * 10 + - ((info.chip_id >> 8) & 0xff); - device->gmem_size = info.gmem_sizebytes; - device->gmem_base = gmem_iova; - - device->heap.size = tu_get_system_heap_size(); - device->heap.used = 0u; - device->heap.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT; - - if (tu_physical_device_init(device, instance) != VK_SUCCESS) - goto fail; - - instance->physical_device_count = 1; - - return VK_SUCCESS; - -fail: - close(fd); - return VK_ERROR_INITIALIZATION_FAILED; -} - -static int -timestamp_to_fd(struct tu_queue *queue, uint32_t timestamp) -{ - int fd; - struct kgsl_timestamp_event event = { - .type = KGSL_TIMESTAMP_EVENT_FENCE, - .context_id = queue->msm_queue_id, - .timestamp = timestamp, - .priv = &fd, - .len = sizeof(fd), - }; - - int ret = safe_ioctl(queue->device->fd, IOCTL_KGSL_TIMESTAMP_EVENT, &event); - if (ret) - return -1; - - return fd; -} - -/* return true if timestamp a is greater (more recent) then b - * this relies on timestamps never having a difference > (1<<31) - */ -static inline bool -timestamp_cmp(uint32_t a, uint32_t b) -{ - return (int32_t) (a - b) >= 0; -} - -static uint32_t -max_ts(uint32_t a, uint32_t b) -{ - return timestamp_cmp(a, b) ? a : b; -} - -static uint32_t -min_ts(uint32_t a, uint32_t b) -{ - return timestamp_cmp(a, b) ? b : a; -} - -static struct tu_syncobj -sync_merge(const VkSemaphore *syncobjs, uint32_t count, bool wait_all, bool reset) -{ - struct tu_syncobj ret; - - ret.timestamp_valid = false; - - for (uint32_t i = 0; i < count; ++i) { - TU_FROM_HANDLE(tu_syncobj, sync, syncobjs[i]); - - /* TODO: this means the fence is unsignaled and will never become signaled */ - if (!sync->timestamp_valid) - continue; - - if (!ret.timestamp_valid) - ret.timestamp = sync->timestamp; - else if (wait_all) - ret.timestamp = max_ts(ret.timestamp, sync->timestamp); - else - ret.timestamp = min_ts(ret.timestamp, sync->timestamp); - - ret.timestamp_valid = true; - if (reset) - sync->timestamp_valid = false; - - } - return ret; -} - -VkResult -tu_QueueSubmit(VkQueue _queue, - uint32_t submitCount, - const VkSubmitInfo *pSubmits, - VkFence _fence) -{ - TU_FROM_HANDLE(tu_queue, queue, _queue); - TU_FROM_HANDLE(tu_syncobj, fence, _fence); - VkResult result = VK_SUCCESS; - - uint32_t max_entry_count = 0; - for (uint32_t i = 0; i < submitCount; ++i) { - const VkSubmitInfo *submit = pSubmits + i; - - const VkPerformanceQuerySubmitInfoKHR *perf_info = - vk_find_struct_const(pSubmits[i].pNext, - PERFORMANCE_QUERY_SUBMIT_INFO_KHR); - - uint32_t entry_count = 0; - for (uint32_t j = 0; j < submit->commandBufferCount; ++j) { - TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, submit->pCommandBuffers[j]); - entry_count += cmdbuf->cs.entry_count; - if (perf_info) - entry_count++; - } - - max_entry_count = MAX2(max_entry_count, entry_count); - } - - struct kgsl_command_object *cmds = - vk_alloc(&queue->device->vk.alloc, - sizeof(cmds[0]) * max_entry_count, 8, - VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); - if (cmds == NULL) - return vk_error(queue->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - - for (uint32_t i = 0; i < submitCount; ++i) { - const VkSubmitInfo *submit = pSubmits + i; - uint32_t entry_idx = 0; - const VkPerformanceQuerySubmitInfoKHR *perf_info = - vk_find_struct_const(pSubmits[i].pNext, - PERFORMANCE_QUERY_SUBMIT_INFO_KHR); - - - for (uint32_t j = 0; j < submit->commandBufferCount; j++) { - TU_FROM_HANDLE(tu_cmd_buffer, cmdbuf, submit->pCommandBuffers[j]); - struct tu_cs *cs = &cmdbuf->cs; - - if (perf_info) { - struct tu_cs_entry *perf_cs_entry = - &cmdbuf->device->perfcntrs_pass_cs_entries[perf_info->counterPassIndex]; - - cmds[entry_idx++] = (struct kgsl_command_object) { - .offset = perf_cs_entry->offset, - .gpuaddr = perf_cs_entry->bo->iova, - .size = perf_cs_entry->size, - .flags = KGSL_CMDLIST_IB, - .id = perf_cs_entry->bo->gem_handle, - }; - } - - for (unsigned k = 0; k < cs->entry_count; k++) { - cmds[entry_idx++] = (struct kgsl_command_object) { - .offset = cs->entries[k].offset, - .gpuaddr = cs->entries[k].bo->iova, - .size = cs->entries[k].size, - .flags = KGSL_CMDLIST_IB, - .id = cs->entries[k].bo->gem_handle, - }; - } - } - - struct tu_syncobj s = sync_merge(submit->pWaitSemaphores, - submit->waitSemaphoreCount, - true, true); - - struct kgsl_cmd_syncpoint_timestamp ts = { - .context_id = queue->msm_queue_id, - .timestamp = s.timestamp, - }; - struct kgsl_command_syncpoint sync = { - .type = KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP, - .size = sizeof(ts), - .priv = (uintptr_t) &ts, - }; - - struct kgsl_gpu_command req = { - .flags = KGSL_CMDBATCH_SUBMIT_IB_LIST, - .context_id = queue->msm_queue_id, - .cmdlist = (uint64_t) (uintptr_t) cmds, - .numcmds = entry_idx, - .cmdsize = sizeof(struct kgsl_command_object), - .synclist = (uintptr_t) &sync, - .syncsize = sizeof(struct kgsl_command_syncpoint), - .numsyncs = s.timestamp_valid ? 1 : 0, - }; - - int ret = safe_ioctl(queue->device->physical_device->local_fd, - IOCTL_KGSL_GPU_COMMAND, &req); - if (ret) { - result = tu_device_set_lost(queue->device, - "submit failed: %s\n", strerror(errno)); - goto fail; - } - - for (uint32_t i = 0; i < submit->signalSemaphoreCount; i++) { - TU_FROM_HANDLE(tu_syncobj, sem, submit->pSignalSemaphores[i]); - sem->timestamp = req.timestamp; - sem->timestamp_valid = true; - } - - /* no need to merge fences as queue execution is serialized */ - if (i == submitCount - 1) { - int fd = timestamp_to_fd(queue, req.timestamp); - if (fd < 0) { - result = tu_device_set_lost(queue->device, - "Failed to create sync file for timestamp: %s\n", - strerror(errno)); - goto fail; - } - - if (queue->fence >= 0) - close(queue->fence); - queue->fence = fd; - - if (fence) { - fence->timestamp = req.timestamp; - fence->timestamp_valid = true; - } - } - } -fail: - vk_free(&queue->device->vk.alloc, cmds); - - return result; -} - -static VkResult -sync_create(VkDevice _device, - bool signaled, - bool fence, - const VkAllocationCallbacks *pAllocator, - void **p_sync) -{ - TU_FROM_HANDLE(tu_device, device, _device); - - struct tu_syncobj *sync = - vk_object_alloc(&device->vk, pAllocator, sizeof(*sync), - fence ? VK_OBJECT_TYPE_FENCE : VK_OBJECT_TYPE_SEMAPHORE); - if (!sync) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - - if (signaled) - tu_finishme("CREATE FENCE SIGNALED"); - - sync->timestamp_valid = false; - *p_sync = sync; - - return VK_SUCCESS; -} - -VkResult -tu_ImportSemaphoreFdKHR(VkDevice _device, - const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo) -{ - tu_finishme("ImportSemaphoreFdKHR"); - return VK_SUCCESS; -} - -VkResult -tu_GetSemaphoreFdKHR(VkDevice _device, - const VkSemaphoreGetFdInfoKHR *pGetFdInfo, - int *pFd) -{ - tu_finishme("GetSemaphoreFdKHR"); - return VK_SUCCESS; -} - -VkResult -tu_CreateSemaphore(VkDevice device, - const VkSemaphoreCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkSemaphore *pSemaphore) -{ - return sync_create(device, false, false, pAllocator, (void**) pSemaphore); -} - -void -tu_DestroySemaphore(VkDevice _device, - VkSemaphore semaphore, - const VkAllocationCallbacks *pAllocator) -{ - TU_FROM_HANDLE(tu_device, device, _device); - TU_FROM_HANDLE(tu_syncobj, sync, semaphore); - - if (!sync) - return; - - vk_object_free(&device->vk, pAllocator, sync); -} - -VkResult -tu_ImportFenceFdKHR(VkDevice _device, - const VkImportFenceFdInfoKHR *pImportFenceFdInfo) -{ - tu_stub(); - - return VK_SUCCESS; -} - -VkResult -tu_GetFenceFdKHR(VkDevice _device, - const VkFenceGetFdInfoKHR *pGetFdInfo, - int *pFd) -{ - tu_stub(); - - return VK_SUCCESS; -} - -VkResult -tu_CreateFence(VkDevice device, - const VkFenceCreateInfo *info, - const VkAllocationCallbacks *pAllocator, - VkFence *pFence) -{ - return sync_create(device, info->flags & VK_FENCE_CREATE_SIGNALED_BIT, true, - pAllocator, (void**) pFence); -} - -void -tu_DestroyFence(VkDevice _device, VkFence fence, const VkAllocationCallbacks *pAllocator) -{ - TU_FROM_HANDLE(tu_device, device, _device); - TU_FROM_HANDLE(tu_syncobj, sync, fence); - - if (!sync) - return; - - vk_object_free(&device->vk, pAllocator, sync); -} - -VkResult -tu_WaitForFences(VkDevice _device, - uint32_t count, - const VkFence *pFences, - VkBool32 waitAll, - uint64_t timeout) -{ - TU_FROM_HANDLE(tu_device, device, _device); - struct tu_syncobj s = sync_merge((const VkSemaphore*) pFences, count, waitAll, false); - - if (!s.timestamp_valid) - return VK_SUCCESS; - - int ret = ioctl(device->fd, IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID, - &(struct kgsl_device_waittimestamp_ctxtid) { - .context_id = device->queues[0]->msm_queue_id, - .timestamp = s.timestamp, - .timeout = timeout / 1000000, - }); - if (ret) { - assert(errno == ETIME); - return VK_TIMEOUT; - } - - return VK_SUCCESS; -} - -VkResult -tu_ResetFences(VkDevice _device, uint32_t count, const VkFence *pFences) -{ - for (uint32_t i = 0; i < count; i++) { - TU_FROM_HANDLE(tu_syncobj, sync, pFences[i]); - sync->timestamp_valid = false; - } - return VK_SUCCESS; -} - -VkResult -tu_GetFenceStatus(VkDevice _device, VkFence _fence) -{ - TU_FROM_HANDLE(tu_device, device, _device); - TU_FROM_HANDLE(tu_syncobj, sync, _fence); - - if (!sync->timestamp_valid) - return VK_NOT_READY; - - int ret = ioctl(device->fd, IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID, - &(struct kgsl_device_waittimestamp_ctxtid) { - .context_id = device->queues[0]->msm_queue_id, - .timestamp = sync->timestamp, - .timeout = 0, - }); - if (ret) { - assert(errno == ETIME); - return VK_NOT_READY; - } - - return VK_SUCCESS; -} - -int -tu_signal_fences(struct tu_device *device, struct tu_syncobj *fence1, struct tu_syncobj *fence2) -{ - tu_finishme("tu_signal_fences"); - return 0; -} - -int -tu_syncobj_to_fd(struct tu_device *device, struct tu_syncobj *sync) -{ - tu_finishme("tu_syncobj_to_fd"); - return -1; -} - -#ifdef ANDROID -VkResult -tu_QueueSignalReleaseImageANDROID(VkQueue _queue, - uint32_t waitSemaphoreCount, - const VkSemaphore *pWaitSemaphores, - VkImage image, - int *pNativeFenceFd) -{ - TU_FROM_HANDLE(tu_queue, queue, _queue); - if (!pNativeFenceFd) - return VK_SUCCESS; - - struct tu_syncobj s = sync_merge(pWaitSemaphores, waitSemaphoreCount, true, true); - - if (!s.timestamp_valid) { - *pNativeFenceFd = -1; - return VK_SUCCESS; - } - - *pNativeFenceFd = timestamp_to_fd(queue, s.timestamp); - - return VK_SUCCESS; -} -#endif diff --git a/lib/mesa/src/freedreno/vulkan/tu_lrz.c b/lib/mesa/src/freedreno/vulkan/tu_lrz.c deleted file mode 100644 index 18a5d158a..000000000 --- a/lib/mesa/src/freedreno/vulkan/tu_lrz.c +++ /dev/null @@ -1,865 +0,0 @@ -/* - * Copyright © 2022 Igalia S.L. - * SPDX-License-Identifier: MIT - */ - -#include "tu_lrz.h" - -#include "tu_clear_blit.h" -#include "tu_cmd_buffer.h" -#include "tu_cs.h" -#include "tu_image.h" - -/* Low-resolution Z buffer is very similar to a depth prepass that helps - * the HW avoid executing the fragment shader on those fragments that will - * be subsequently discarded by the depth test afterwards. - * - * The interesting part of this feature is that it allows applications - * to submit the vertices in any order. - * - * In the binning pass it is possible to store the depth value of each - * vertex into internal low resolution depth buffer and quickly test - * the primitives against it during the render pass. - * - * There are a number of limitations when LRZ cannot be used: - * - Fragment shader side-effects (writing to SSBOs, atomic operations, etc); - * - Writing to stencil buffer - * - Writing depth while: - * - Changing direction of depth test (e.g. from OP_GREATER to OP_LESS); - * - Using OP_ALWAYS or OP_NOT_EQUAL; - * - Clearing depth with vkCmdClearAttachments; - * - (pre-a650) Not clearing depth attachment with LOAD_OP_CLEAR; - * - (pre-a650) Using secondary command buffers; - * - Sysmem rendering (with small caveat). - * - * Pre-a650 (before gen3) - * ====================== - * - * The direction is fully tracked on CPU. In renderpass LRZ starts with - * unknown direction, the direction is set first time when depth write occurs - * and if it does change afterwards - direction becomes invalid and LRZ is - * disabled for the rest of the renderpass. - * - * Since direction is not tracked by GPU - it's impossible to know whether - * LRZ is enabled during construction of secondary command buffers. - * - * For the same reason it's impossible to reuse LRZ between renderpasses. - * - * A650+ (gen3+) - * ============= - * - * Now LRZ direction could be tracked on GPU. There are to parts: - * - Direction byte which stores current LRZ direction; - * - Parameters of the last used depth view. - * - * The idea is the same as when LRZ tracked on CPU: when GRAS_LRZ_CNTL - * is used - its direction is compared to previously known direction - * and direction byte is set to disabled when directions are incompatible. - * - * Additionally, to reuse LRZ between renderpasses, GRAS_LRZ_CNTL checks - * if current value of GRAS_LRZ_DEPTH_VIEW is equal to the value - * stored in the buffer, if not - LRZ is disabled. (This is necessary - * because depth buffer may have several layers and mip levels, on the - * other hand LRZ buffer represents only a single layer + mip level). - * - * LRZ direction between renderpasses is disabled when underlying depth - * buffer is changed, the following commands could change depth image: - * - vkCmdBlitImage* - * - vkCmdCopyBufferToImage* - * - vkCmdCopyImage* - * - * LRZ Fast-Clear - * ============== - * - * The LRZ fast-clear buffer is initialized to zeroes and read/written - * when GRAS_LRZ_CNTL.FC_ENABLE (b3) is set. It appears to store 1b/block. - * '0' means block has original depth clear value, and '1' means that the - * corresponding block in LRZ has been modified. - * - * LRZ fast-clear conservatively clears LRZ buffer, at the point where LRZ is - * written the LRZ block which corresponds to a single fast-clear bit is cleared: - * - To 0.0 if depth comparison is GREATER; - * - To 1.0 if depth comparison is LESS; - * - * This way it's always valid to fast-clear. On the other hand we disable - * fast-clear if depth clear value is not 0.0 or 1.0 because it may be worse - * for perf if some primitives are expected to fail depth test against the - * actual depth clear value. - * - * LRZ Precision - * ============= - * - * LRZ always uses Z16_UNORM. The epsilon for it is 1.f / (1 << 16) which is - * not enough to represent all values of Z32_UNORM or Z32_FLOAT. - * This especially rises questions in context of fast-clear, if fast-clear - * uses a value which cannot be precisely represented by LRZ - we wouldn't - * be able to round it in the correct direction since direction is tracked - * on GPU. - * - * However, it seems that depth comparisons with LRZ values have some "slack" - * and nothing special should be done for such depth clear values. - * - * How it was tested: - * - Clear Z32_FLOAT attachment to 1.f / (1 << 17) - * - LRZ buffer contains all zeroes - * - Do draws and check whether all samples are passing: - * - OP_GREATER with (1.f / (1 << 17) + float32_epsilon) - passing; - * - OP_GREATER with (1.f / (1 << 17) - float32_epsilon) - not passing; - * - OP_LESS with (1.f / (1 << 17) - float32_epsilon) - samples; - * - OP_LESS with() 1.f / (1 << 17) + float32_epsilon) - not passing; - * - OP_LESS_OR_EQ with (1.f / (1 << 17) + float32_epsilon) - not passing; - * In all cases resulting LRZ buffer is all zeroes and LRZ direction is updated. - * - * LRZ Caches - * ========== - * - * ! The policy here is to flush LRZ cache right after it is changed, - * so if LRZ data is needed afterwards - there is no need to flush it - * before using LRZ. - * - * LRZ_FLUSH flushes and invalidates LRZ caches, there are two caches: - * - Cache for fast-clear buffer; - * - Cache for direction byte + depth view params. - * They could be cleared by LRZ_CLEAR. To become visible in GPU memory - * the caches should be flushed with LRZ_FLUSH afterwards. - * - * GRAS_LRZ_CNTL reads from these caches. - */ - -static void -tu6_emit_lrz_buffer(struct tu_cs *cs, struct tu_image *depth_image) -{ - if (!depth_image) { - tu_cs_emit_regs(cs, - A6XX_GRAS_LRZ_BUFFER_BASE(0), - A6XX_GRAS_LRZ_BUFFER_PITCH(0), - A6XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE(0)); - return; - } - - uint64_t lrz_iova = depth_image->iova + depth_image->lrz_offset; - uint64_t lrz_fc_iova = depth_image->iova + depth_image->lrz_fc_offset; - if (!depth_image->lrz_fc_offset) - lrz_fc_iova = 0; - - tu_cs_emit_regs(cs, - A6XX_GRAS_LRZ_BUFFER_BASE(.qword = lrz_iova), - A6XX_GRAS_LRZ_BUFFER_PITCH(.pitch = depth_image->lrz_pitch), - A6XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE(.qword = lrz_fc_iova)); -} - -static void -tu6_write_lrz_reg(struct tu_cmd_buffer *cmd, struct tu_cs *cs, - struct tu_reg_value reg) -{ - if (cmd->device->physical_device->info->a6xx.lrz_track_quirk) { - tu_cs_emit_pkt7(cs, CP_REG_WRITE, 3); - tu_cs_emit(cs, CP_REG_WRITE_0_TRACKER(TRACK_LRZ)); - tu_cs_emit(cs, reg.reg); - tu_cs_emit(cs, reg.value); - } else { - tu_cs_emit_pkt4(cs, reg.reg, 1); - tu_cs_emit(cs, reg.value); - } -} - -static void -tu6_disable_lrz_via_depth_view(struct tu_cmd_buffer *cmd, struct tu_cs *cs) -{ - /* Disable direction by writing invalid depth view. */ - tu6_write_lrz_reg(cmd, cs, A6XX_GRAS_LRZ_DEPTH_VIEW( - .base_layer = 0b11111111111, - .layer_count = 0b11111111111, - .base_mip_level = 0b1111, - )); - - tu6_write_lrz_reg(cmd, cs, A6XX_GRAS_LRZ_CNTL( - .enable = true, - .disable_on_wrong_dir = true, - )); - - tu6_emit_event_write(cmd, cs, LRZ_CLEAR); - tu6_emit_event_write(cmd, cs, LRZ_FLUSH); -} - -static void -tu_lrz_init_state(struct tu_cmd_buffer *cmd, - const struct tu_render_pass_attachment *att, - const struct tu_image_view *view) -{ - if (!view->image->lrz_height) { - assert((cmd->device->instance->debug_flags & TU_DEBUG_NOLRZ) || - !vk_format_has_depth(att->format)); - return; - } - - bool clears_depth = att->clear_mask & - (VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT); - bool has_gpu_tracking = - cmd->device->physical_device->info->a6xx.has_lrz_dir_tracking; - - if (!has_gpu_tracking && !clears_depth) - return; - - /* We need to always have an LRZ view just to disable it if there is a - * depth attachment, there are any secondaries, and GPU tracking is - * enabled, in order not to rely on loadOp state which doesn't exist with - * dynamic rendering in secondaries. Otherwise the secondary will have LRZ - * enabled and there will be a NULL/garbage LRZ buffer. - */ - cmd->state.lrz.image_view = view; - - if (!clears_depth && !att->load) - return; - - cmd->state.lrz.valid = true; - cmd->state.lrz.prev_direction = TU_LRZ_UNKNOWN; - /* Be optimistic and unconditionally enable fast-clear in - * secondary cmdbufs and when reusing previous LRZ state. - */ - cmd->state.lrz.fast_clear = view->image->lrz_fc_size > 0; - - cmd->state.lrz.gpu_dir_tracking = has_gpu_tracking; - cmd->state.lrz.reuse_previous_state = !clears_depth; -} - -/* Note: if we enable LRZ here, then tu_lrz_init_state() must at least set - * lrz.image_view, so that an LRZ buffer is present (even if LRZ is - * dynamically disabled). - */ - -static void -tu_lrz_init_secondary(struct tu_cmd_buffer *cmd, - const struct tu_render_pass_attachment *att) -{ - bool has_gpu_tracking = - cmd->device->physical_device->info->a6xx.has_lrz_dir_tracking; - - if (!has_gpu_tracking) - return; - - if (cmd->device->instance->debug_flags & TU_DEBUG_NOLRZ) - return; - - if (!vk_format_has_depth(att->format)) - return; - - cmd->state.lrz.valid = true; - cmd->state.lrz.prev_direction = TU_LRZ_UNKNOWN; - cmd->state.lrz.gpu_dir_tracking = has_gpu_tracking; - - /* We may not have the depth attachment when executing in a secondary - * inside a render pass. This means we have to be even more optimistic than - * the normal case and enable fast clear even if the depth image doesn't - * support it. - */ - cmd->state.lrz.fast_clear = true; - - /* These are not used inside secondaries */ - cmd->state.lrz.image_view = NULL; - cmd->state.lrz.reuse_previous_state = false; -} - -/* This is generally the same as tu_lrz_begin_renderpass(), but we skip - * actually emitting anything. The lrz state needs to be consistent between - * renderpasses, but only the first should actually emit commands to disable - * lrz etc. - */ -void -tu_lrz_begin_resumed_renderpass(struct tu_cmd_buffer *cmd, - const VkClearValue *clear_values) -{ - /* Track LRZ valid state */ - memset(&cmd->state.lrz, 0, sizeof(cmd->state.lrz)); - - uint32_t a; - for (a = 0; a < cmd->state.pass->attachment_count; a++) { - if (cmd->state.attachments[a]->image->lrz_height) - break; - } - - if (a != cmd->state.pass->attachment_count) { - const struct tu_render_pass_attachment *att = &cmd->state.pass->attachments[a]; - tu_lrz_init_state(cmd, att, cmd->state.attachments[a]); - if (att->clear_mask & (VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT)) { - VkClearValue clear = clear_values[a]; - cmd->state.lrz.depth_clear_value = clear; - cmd->state.lrz.fast_clear = cmd->state.lrz.fast_clear && - (clear.depthStencil.depth == 0.f || - clear.depthStencil.depth == 1.f); - } - cmd->state.dirty |= TU_CMD_DIRTY_LRZ; - } -} - -void -tu_lrz_begin_renderpass(struct tu_cmd_buffer *cmd, - const VkClearValue *clear_values) -{ - const struct tu_render_pass *pass = cmd->state.pass; - - int lrz_img_count = 0; - for (unsigned i = 0; i < pass->attachment_count; i++) { - if (cmd->state.attachments[i]->image->lrz_height) - lrz_img_count++; - } - - if (cmd->device->physical_device->info->a6xx.has_lrz_dir_tracking && - cmd->state.pass->subpass_count > 1 && lrz_img_count > 1) { - /* Theoretically we could switch between LRZ buffers during the binning - * and tiling passes, but it is untested and would add complexity for - * presumably extremely rare case. - */ - perf_debug(cmd->device, - "Invalidating LRZ because there are several subpasses with " - "different depth attachments in a single renderpass"); - - for (unsigned i = 0; i < pass->attachment_count; i++) { - struct tu_image *image = cmd->state.attachments[i]->image; - tu_disable_lrz(cmd, &cmd->cs, image); - } - - /* We need a valid LRZ fast-clear base, in case the render pass contents - * are in secondaries that enable LRZ, so that they can read that LRZ is - * dynamically disabled. It doesn't matter which we use, so just leave - * the last one as emitted in tu_disable_lrz(). - */ - memset(&cmd->state.lrz, 0, sizeof(cmd->state.lrz)); - return; - } - - /* Track LRZ valid state */ - tu_lrz_begin_resumed_renderpass(cmd, clear_values); - - if (!cmd->state.lrz.valid) { - tu6_emit_lrz_buffer(&cmd->cs, NULL); - } -} - -void -tu_lrz_begin_secondary_cmdbuf(struct tu_cmd_buffer *cmd) -{ - memset(&cmd->state.lrz, 0, sizeof(cmd->state.lrz)); - uint32_t a = cmd->state.subpass->depth_stencil_attachment.attachment; - if (a != VK_ATTACHMENT_UNUSED) { - const struct tu_render_pass_attachment *att = &cmd->state.pass->attachments[a]; - tu_lrz_init_secondary(cmd, att); - } -} - -void -tu_lrz_tiling_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs) -{ - /* TODO: If lrz was never valid for the entire renderpass, we could exit - * early here. Sometimes we know this ahead of time and null out - * image_view, but with LOAD_OP_DONT_CARE this only happens if there were - * no secondaries. - */ - if (!cmd->state.lrz.image_view) - return; - - struct tu_lrz_state *lrz = &cmd->state.lrz; - - tu6_emit_lrz_buffer(cs, lrz->image_view->image); - - if (lrz->reuse_previous_state) { - /* Reuse previous LRZ state, LRZ cache is assumed to be - * already invalidated by previous renderpass. - */ - assert(lrz->gpu_dir_tracking); - - tu6_write_lrz_reg(cmd, cs, - A6XX_GRAS_LRZ_DEPTH_VIEW(.dword = lrz->image_view->view.GRAS_LRZ_DEPTH_VIEW)); - return; - } - - bool invalidate_lrz = !lrz->valid && lrz->gpu_dir_tracking; - if (invalidate_lrz) { - /* Following the blob we elect to disable LRZ for the whole renderpass - * if it is known that LRZ is disabled somewhere in the renderpass. - * - * This is accomplished by making later GRAS_LRZ_CNTL (in binning pass) - * to fail the comparison of depth views. - */ - tu6_disable_lrz_via_depth_view(cmd, cs); - tu6_write_lrz_reg(cmd, cs, A6XX_GRAS_LRZ_DEPTH_VIEW(.dword = 0)); - } else if (lrz->fast_clear || lrz->gpu_dir_tracking) { - if (lrz->gpu_dir_tracking) { - tu6_write_lrz_reg(cmd, cs, - A6XX_GRAS_LRZ_DEPTH_VIEW(.dword = lrz->image_view->view.GRAS_LRZ_DEPTH_VIEW)); - } - - tu6_write_lrz_reg(cmd, cs, A6XX_GRAS_LRZ_CNTL( - .enable = true, - .fc_enable = lrz->fast_clear, - .disable_on_wrong_dir = lrz->gpu_dir_tracking, - )); - - /* LRZ_CLEAR.fc_enable + LRZ_CLEAR - clears fast-clear buffer; - * LRZ_CLEAR.disable_on_wrong_dir + LRZ_CLEAR - sets direction to - * CUR_DIR_UNSET. - */ - tu6_emit_event_write(cmd, cs, LRZ_CLEAR); - } - - if (!lrz->fast_clear && !invalidate_lrz) { - tu6_clear_lrz(cmd, cs, lrz->image_view->image, &lrz->depth_clear_value); - - /* Even though we disable fast-clear we still have to dirty - * fast-clear buffer because both secondary cmdbufs and following - * renderpasses won't know that fast-clear is disabled. - * - * TODO: we could avoid this if we don't store depth and don't - * expect secondary cmdbufs. - */ - if (lrz->image_view->image->lrz_fc_size) { - tu6_dirty_lrz_fc(cmd, cs, lrz->image_view->image); - } - } -} - -void -tu_lrz_tiling_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs) -{ - if (cmd->state.lrz.fast_clear || cmd->state.lrz.gpu_dir_tracking) { - tu6_emit_lrz_buffer(cs, cmd->state.lrz.image_view->image); - - if (cmd->state.lrz.gpu_dir_tracking) { - tu6_write_lrz_reg(cmd, &cmd->cs, - A6XX_GRAS_LRZ_DEPTH_VIEW(.dword = cmd->state.lrz.image_view->view.GRAS_LRZ_DEPTH_VIEW)); - } - - /* Enable flushing of LRZ fast-clear and of direction buffer */ - tu6_write_lrz_reg(cmd, cs, A6XX_GRAS_LRZ_CNTL( - .enable = true, - .fc_enable = cmd->state.lrz.fast_clear, - .disable_on_wrong_dir = cmd->state.lrz.gpu_dir_tracking, - )); - } else { - tu6_write_lrz_reg(cmd, cs, A6XX_GRAS_LRZ_CNTL(0)); - } - - tu6_emit_event_write(cmd, cs, LRZ_FLUSH); - - /* If gpu_dir_tracking is enabled and lrz is not valid blob, at this point, - * additionally clears direction buffer: - * GRAS_LRZ_DEPTH_VIEW(.dword = 0) - * GRAS_LRZ_DEPTH_VIEW(.dword = 0xffffffff) - * A6XX_GRAS_LRZ_CNTL(.enable = true, .disable_on_wrong_dir = true) - * LRZ_CLEAR - * LRZ_FLUSH - * Since it happens after all of the rendering is done there is no known - * reason to do such clear. - */ -} - -void -tu_lrz_sysmem_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs) -{ - if (!cmd->state.lrz.image_view) - return; - - /* Actually, LRZ buffer could be filled in sysmem, in theory to - * be used in another renderpass, but the benefit is rather dubious. - */ - - struct tu_lrz_state *lrz = &cmd->state.lrz; - - if (cmd->device->physical_device->info->a6xx.has_lrz_dir_tracking) { - tu_disable_lrz(cmd, cs, lrz->image_view->image); - /* Make sure depth view comparison will fail. */ - tu6_write_lrz_reg(cmd, cs, - A6XX_GRAS_LRZ_DEPTH_VIEW(.dword = 0)); - } else { - tu6_emit_lrz_buffer(cs, lrz->image_view->image); - /* Even though we disable LRZ writes in sysmem mode - there is still - * LRZ test, so LRZ should be cleared. - */ - if (lrz->fast_clear) { - tu6_write_lrz_reg(cmd, &cmd->cs, A6XX_GRAS_LRZ_CNTL( - .enable = true, - .fc_enable = true, - )); - tu6_emit_event_write(cmd, &cmd->cs, LRZ_CLEAR); - tu6_emit_event_write(cmd, &cmd->cs, LRZ_FLUSH); - } else { - tu6_clear_lrz(cmd, cs, lrz->image_view->image, &lrz->depth_clear_value); - } - } -} - -void -tu_lrz_sysmem_end(struct tu_cmd_buffer *cmd, struct tu_cs *cs) -{ - tu6_emit_event_write(cmd, &cmd->cs, LRZ_FLUSH); -} - -/* Disable LRZ outside of renderpass. */ -void -tu_disable_lrz(struct tu_cmd_buffer *cmd, struct tu_cs *cs, - struct tu_image *image) -{ - if (!cmd->device->physical_device->info->a6xx.has_lrz_dir_tracking) - return; - - if (!image->lrz_height) - return; - - tu6_emit_lrz_buffer(cs, image); - tu6_disable_lrz_via_depth_view(cmd, cs); -} - -/* Clear LRZ, used for out of renderpass depth clears. */ -void -tu_lrz_clear_depth_image(struct tu_cmd_buffer *cmd, - struct tu_image *image, - const VkClearDepthStencilValue *pDepthStencil, - uint32_t rangeCount, - const VkImageSubresourceRange *pRanges) -{ - if (!rangeCount || !image->lrz_height || - !cmd->device->physical_device->info->a6xx.has_lrz_dir_tracking) - return; - - /* We cannot predict which depth subresource would be used later on, - * so we just pick the first one with depth cleared and clear the LRZ. - */ - const VkImageSubresourceRange *range = NULL; - for (unsigned i = 0; i < rangeCount; i++) { - if (pRanges[i].aspectMask & - (VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT)) { - range = &pRanges[i]; - break; - } - } - - if (!range) - return; - - bool fast_clear = image->lrz_fc_size && (pDepthStencil->depth == 0.f || - pDepthStencil->depth == 1.f); - - tu6_emit_lrz_buffer(&cmd->cs, image); - - tu6_write_lrz_reg(cmd, &cmd->cs, A6XX_GRAS_LRZ_DEPTH_VIEW( - .base_layer = range->baseArrayLayer, - .layer_count = vk_image_subresource_layer_count(&image->vk, range), - .base_mip_level = range->baseMipLevel, - )); - - tu6_write_lrz_reg(cmd, &cmd->cs, A6XX_GRAS_LRZ_CNTL( - .enable = true, - .fc_enable = fast_clear, - .disable_on_wrong_dir = true, - )); - - tu6_emit_event_write(cmd, &cmd->cs, LRZ_CLEAR); - tu6_emit_event_write(cmd, &cmd->cs, LRZ_FLUSH); - - if (!fast_clear) { - tu6_clear_lrz(cmd, &cmd->cs, image, (const VkClearValue*) pDepthStencil); - } -} - -void -tu_lrz_disable_during_renderpass(struct tu_cmd_buffer *cmd) -{ - assert(cmd->state.pass); - - cmd->state.lrz.valid = false; - cmd->state.dirty |= TU_CMD_DIRTY_LRZ; - - if (cmd->state.lrz.gpu_dir_tracking) { - tu6_write_lrz_reg(cmd, &cmd->cs, A6XX_GRAS_LRZ_CNTL( - .enable = true, - .dir = LRZ_DIR_INVALID, - .disable_on_wrong_dir = true, - )); - } -} - -/* update lrz state based on stencil-test func: - * - * Conceptually the order of the pipeline is: - * - * - * FS -> Alpha-Test -> Stencil-Test -> Depth-Test - * | | - * if wrmask != 0 if wrmask != 0 - * | | - * v v - * Stencil-Write Depth-Write - * - * Because Stencil-Test can have side effects (Stencil-Write) prior - * to depth test, in this case we potentially need to disable early - * lrz-test. See: - * - * https://www.khronos.org/opengl/wiki/Per-Sample_Processing - */ -static bool -tu6_stencil_op_lrz_allowed(struct A6XX_GRAS_LRZ_CNTL *gras_lrz_cntl, - VkCompareOp func, - bool stencil_write) -{ - switch (func) { - case VK_COMPARE_OP_ALWAYS: - /* nothing to do for LRZ, but for stencil test when stencil- - * write is enabled, we need to disable lrz-test, since - * conceptually stencil test and write happens before depth-test. - */ - if (stencil_write) { - return false; - } - break; - case VK_COMPARE_OP_NEVER: - /* fragment never passes, disable lrz_write for this draw. */ - gras_lrz_cntl->lrz_write = false; - break; - default: - /* whether the fragment passes or not depends on result - * of stencil test, which we cannot know when doing binning - * pass. - */ - gras_lrz_cntl->lrz_write = false; - /* similarly to the VK_COMPARE_OP_ALWAYS case, if there are side- - * effects from stencil test we need to disable lrz-test. - */ - if (stencil_write) { - return false; - } - break; - } - - return true; -} - -static struct A6XX_GRAS_LRZ_CNTL -tu6_calculate_lrz_state(struct tu_cmd_buffer *cmd, - const uint32_t a) -{ - struct tu_pipeline *pipeline = cmd->state.pipeline; - bool z_test_enable = cmd->state.rb_depth_cntl & A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE; - bool z_write_enable = cmd->state.rb_depth_cntl & A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE; - bool z_bounds_enable = cmd->state.rb_depth_cntl & A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE; - VkCompareOp depth_compare_op = (cmd->state.rb_depth_cntl & A6XX_RB_DEPTH_CNTL_ZFUNC__MASK) >> A6XX_RB_DEPTH_CNTL_ZFUNC__SHIFT; - - struct A6XX_GRAS_LRZ_CNTL gras_lrz_cntl = { 0 }; - - if (!cmd->state.lrz.valid) { - return gras_lrz_cntl; - } - - /* If depth test is disabled we shouldn't touch LRZ. - * Same if there is no depth attachment. - */ - if (a == VK_ATTACHMENT_UNUSED || !z_test_enable || - (cmd->device->instance->debug_flags & TU_DEBUG_NOLRZ)) - return gras_lrz_cntl; - - if (!cmd->state.lrz.gpu_dir_tracking && !cmd->state.attachments) { - /* Without on-gpu LRZ direction tracking - there is nothing we - * can do to enable LRZ in secondary command buffers. - */ - return gras_lrz_cntl; - } - - gras_lrz_cntl.enable = true; - gras_lrz_cntl.lrz_write = - z_write_enable && - !(pipeline->lrz.force_disable_mask & TU_LRZ_FORCE_DISABLE_WRITE); - gras_lrz_cntl.z_test_enable = z_write_enable; - gras_lrz_cntl.z_bounds_enable = z_bounds_enable; - gras_lrz_cntl.fc_enable = cmd->state.lrz.fast_clear; - gras_lrz_cntl.dir_write = cmd->state.lrz.gpu_dir_tracking; - gras_lrz_cntl.disable_on_wrong_dir = cmd->state.lrz.gpu_dir_tracking; - - /* See comment in tu_pipeline about disabling LRZ write for blending. */ - if ((cmd->state.pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_LOGIC_OP)) && - cmd->state.logic_op_enabled && cmd->state.rop_reads_dst) { - if (gras_lrz_cntl.lrz_write) - perf_debug(cmd->device, "disabling lrz write due to dynamic logic op"); - gras_lrz_cntl.lrz_write = false; - } - - if ((cmd->state.pipeline->dynamic_state_mask & - BIT(TU_DYNAMIC_STATE_COLOR_WRITE_ENABLE)) && - (cmd->state.color_write_enable & - MASK(cmd->state.subpass->color_count)) != - MASK(cmd->state.pipeline->blend.num_rts)) { - if (gras_lrz_cntl.lrz_write) { - perf_debug( - cmd->device, - "disabling lrz write due to dynamic color write enables (%x/%x)", - cmd->state.color_write_enable, - MASK(cmd->state.pipeline->blend.num_rts)); - } - gras_lrz_cntl.lrz_write = false; - } - - /* LRZ is disabled until it is cleared, which means that one "wrong" - * depth test or shader could disable LRZ until depth buffer is cleared. - */ - bool disable_lrz = false; - bool temporary_disable_lrz = false; - - /* What happens in FS could affect LRZ, e.g.: writes to gl_FragDepth or early - * fragment tests. We have to skip LRZ testing and updating, but as long as - * the depth direction stayed the same we can continue with LRZ testing later. - */ - if (pipeline->lrz.force_disable_mask & TU_LRZ_FORCE_DISABLE_LRZ) { - if (cmd->state.lrz.prev_direction != TU_LRZ_UNKNOWN || !cmd->state.lrz.gpu_dir_tracking) { - perf_debug(cmd->device, "Skipping LRZ due to FS"); - temporary_disable_lrz = true; - } else { - perf_debug(cmd->device, "Disabling LRZ due to FS (TODO: fix for gpu-direction-tracking case"); - disable_lrz = true; - } - } - - /* If Z is not written - it doesn't affect LRZ buffer state. - * Which means two things: - * - Don't lock direction until Z is written for the first time; - * - If Z isn't written and direction IS locked it's possible to just - * temporary disable LRZ instead of fully bailing out, when direction - * is changed. - */ - - enum tu_lrz_direction lrz_direction = TU_LRZ_UNKNOWN; - switch (depth_compare_op) { - case VK_COMPARE_OP_ALWAYS: - case VK_COMPARE_OP_NOT_EQUAL: - /* OP_ALWAYS and OP_NOT_EQUAL could have depth value of any direction, - * so if there is a depth write - LRZ must be disabled. - */ - if (z_write_enable) { - perf_debug(cmd->device, "Invalidating LRZ due to ALWAYS/NOT_EQUAL"); - disable_lrz = true; - gras_lrz_cntl.dir = LRZ_DIR_INVALID; - } else { - perf_debug(cmd->device, "Skipping LRZ due to ALWAYS/NOT_EQUAL"); - temporary_disable_lrz = true; - } - break; - case VK_COMPARE_OP_EQUAL: - case VK_COMPARE_OP_NEVER: - /* Blob disables LRZ for OP_EQUAL, and from our empirical - * evidence it is a right thing to do. - * - * Both OP_EQUAL and OP_NEVER don't change LRZ buffer so - * we could just temporary disable LRZ. - */ - temporary_disable_lrz = true; - break; - case VK_COMPARE_OP_GREATER: - case VK_COMPARE_OP_GREATER_OR_EQUAL: - lrz_direction = TU_LRZ_GREATER; - gras_lrz_cntl.greater = true; - gras_lrz_cntl.dir = LRZ_DIR_GE; - break; - case VK_COMPARE_OP_LESS: - case VK_COMPARE_OP_LESS_OR_EQUAL: - lrz_direction = TU_LRZ_LESS; - gras_lrz_cntl.greater = false; - gras_lrz_cntl.dir = LRZ_DIR_LE; - break; - default: - unreachable("bad VK_COMPARE_OP value or uninitialized"); - break; - }; - - /* If depthfunc direction is changed, bail out on using LRZ. The - * LRZ buffer encodes a min/max depth value per block, but if - * we switch from GT/GE <-> LT/LE, those values cannot be - * interpreted properly. - */ - if (cmd->state.lrz.prev_direction != TU_LRZ_UNKNOWN && - lrz_direction != TU_LRZ_UNKNOWN && - cmd->state.lrz.prev_direction != lrz_direction) { - if (z_write_enable) { - perf_debug(cmd->device, "Invalidating LRZ due to direction change"); - disable_lrz = true; - } else { - perf_debug(cmd->device, "Skipping LRZ due to direction change"); - temporary_disable_lrz = true; - } - } - - /* Consider the following sequence of depthfunc changes: - * - * - COMPARE_OP_GREATER -> COMPARE_OP_EQUAL -> COMPARE_OP_GREATER - * LRZ is disabled during COMPARE_OP_EQUAL but could be enabled - * during second VK_COMPARE_OP_GREATER. - * - * - COMPARE_OP_GREATER -> COMPARE_OP_EQUAL -> COMPARE_OP_LESS - * Here, LRZ is disabled during COMPARE_OP_EQUAL and should become - * invalid during COMPARE_OP_LESS. - * - * This shows that we should keep last KNOWN direction. - */ - if (z_write_enable && lrz_direction != TU_LRZ_UNKNOWN) - cmd->state.lrz.prev_direction = lrz_direction; - - /* Invalidate LRZ and disable write if stencil test is enabled */ - bool stencil_test_enable = cmd->state.rb_stencil_cntl & A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE; - if (!disable_lrz && stencil_test_enable) { - VkCompareOp stencil_front_compare_op = - (cmd->state.rb_stencil_cntl & A6XX_RB_STENCIL_CONTROL_FUNC__MASK) >> A6XX_RB_STENCIL_CONTROL_FUNC__SHIFT; - - VkCompareOp stencil_back_compare_op = - (cmd->state.rb_stencil_cntl & A6XX_RB_STENCIL_CONTROL_FUNC_BF__MASK) >> A6XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT; - - bool lrz_allowed = true; - lrz_allowed = lrz_allowed && tu6_stencil_op_lrz_allowed( - &gras_lrz_cntl, stencil_front_compare_op, - cmd->state.stencil_front_write); - - lrz_allowed = lrz_allowed && tu6_stencil_op_lrz_allowed( - &gras_lrz_cntl, stencil_back_compare_op, - cmd->state.stencil_back_write); - - /* Without depth write it's enough to make sure that depth test - * is executed after stencil test, so temporary disabling LRZ is enough. - */ - if (!lrz_allowed) { - if (z_write_enable) { - perf_debug(cmd->device, "Invalidating LRZ due to stencil write"); - disable_lrz = true; - } else { - perf_debug(cmd->device, "Skipping LRZ due to stencil write"); - temporary_disable_lrz = true; - } - } - } - - if (disable_lrz) - cmd->state.lrz.valid = false; - - if (disable_lrz && cmd->state.lrz.gpu_dir_tracking) { - /* Direction byte on GPU should be set to CUR_DIR_DISABLED, - * for this it's not enough to emit empty GRAS_LRZ_CNTL. - */ - gras_lrz_cntl.enable = true; - gras_lrz_cntl.dir = LRZ_DIR_INVALID; - - return gras_lrz_cntl; - } - - if (temporary_disable_lrz) - gras_lrz_cntl.enable = false; - - cmd->state.lrz.enabled = cmd->state.lrz.valid && gras_lrz_cntl.enable; - if (!cmd->state.lrz.enabled) - memset(&gras_lrz_cntl, 0, sizeof(gras_lrz_cntl)); - - return gras_lrz_cntl; -} - -void -tu6_emit_lrz(struct tu_cmd_buffer *cmd, struct tu_cs *cs) -{ - const uint32_t a = cmd->state.subpass->depth_stencil_attachment.attachment; - struct A6XX_GRAS_LRZ_CNTL gras_lrz_cntl = tu6_calculate_lrz_state(cmd, a); - - tu6_write_lrz_reg(cmd, cs, pack_A6XX_GRAS_LRZ_CNTL(gras_lrz_cntl)); - tu_cs_emit_regs(cs, A6XX_RB_LRZ_CNTL(.enable = gras_lrz_cntl.enable)); -} diff --git a/lib/mesa/src/freedreno/vulkan/tu_nir_lower_multiview.c b/lib/mesa/src/freedreno/vulkan/tu_nir_lower_multiview.c deleted file mode 100644 index b039afdde..000000000 --- a/lib/mesa/src/freedreno/vulkan/tu_nir_lower_multiview.c +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright © 2020 Valve Corporation - * SPDX-License-Identifier: MIT - */ - -#include "tu_private.h" -#include "nir_builder.h" - -/* Some a6xx variants cannot support a non-contiguous multiview mask. Instead, - * inside the shader something like this needs to be inserted: - * - * gl_Position = ((1ull << gl_ViewIndex) & view_mask) ? gl_Position : vec4(0.); - * - * Scan backwards until we find the gl_Position write (there should only be - * one). - */ -static bool -lower_multiview_mask(nir_shader *nir, uint32_t *mask) -{ - nir_function_impl *impl = nir_shader_get_entrypoint(nir); - - if (util_is_power_of_two_or_zero(*mask + 1)) { - nir_metadata_preserve(impl, nir_metadata_all); - return false; - } - - nir_builder b; - nir_builder_init(&b, impl); - - uint32_t old_mask = *mask; - *mask = BIT(util_logbase2(old_mask) + 1) - 1; - - nir_foreach_block_reverse(block, impl) { - nir_foreach_instr_reverse(instr, block) { - if (instr->type != nir_instr_type_intrinsic) - continue; - - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - if (intrin->intrinsic != nir_intrinsic_store_deref) - continue; - - nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); - if (!nir_deref_mode_is(deref, nir_var_shader_out)) - continue; - - nir_variable *var = nir_deref_instr_get_variable(deref); - if (var->data.location != VARYING_SLOT_POS) - continue; - - assert(intrin->src[1].is_ssa); - nir_ssa_def *orig_src = intrin->src[1].ssa; - b.cursor = nir_before_instr(instr); - - /* ((1ull << gl_ViewIndex) & mask) != 0 */ - nir_ssa_def *cmp = - nir_i2b(&b, nir_iand(&b, nir_imm_int(&b, old_mask), - nir_ishl(&b, nir_imm_int(&b, 1), - nir_load_view_index(&b)))); - - nir_ssa_def *src = nir_bcsel(&b, cmp, orig_src, nir_imm_float(&b, 0.)); - nir_instr_rewrite_src(instr, &intrin->src[1], nir_src_for_ssa(src)); - - nir_metadata_preserve(impl, nir_metadata_block_index | - nir_metadata_dominance); - return true; - } - } - - nir_metadata_preserve(impl, nir_metadata_all); - return false; -} - -bool -tu_nir_lower_multiview(nir_shader *nir, uint32_t mask, bool *multi_pos_output, - struct tu_device *dev) -{ - *multi_pos_output = false; - - bool progress = false; - - if (!dev->physical_device->info.a6xx.supports_multiview_mask) - NIR_PASS(progress, nir, lower_multiview_mask, &mask); - - unsigned num_views = util_logbase2(mask) + 1; - - /* Blob doesn't apply multipos optimization starting from 11 views - * even on a650, however in practice, with the limit of 16 views, - * tests pass on a640/a650 and fail on a630. - */ - unsigned max_views_for_multipos = - dev->physical_device->info.a6xx.supports_multiview_mask ? 16 : 10; - - /* Speculatively assign output locations so that we know num_outputs. We - * will assign output locations for real after this pass. - */ - unsigned num_outputs; - nir_assign_io_var_locations(nir, nir_var_shader_out, &num_outputs, MESA_SHADER_VERTEX); - - /* In addition to the generic checks done by NIR, check that we don't - * overflow VPC with the extra copies of gl_Position. - */ - if (likely(!(dev->physical_device->instance->debug_flags & TU_DEBUG_NOMULTIPOS)) && - num_views <= max_views_for_multipos && num_outputs + (num_views - 1) <= 32 && - nir_can_lower_multiview(nir)) { - *multi_pos_output = true; - - /* It appears that the multiview mask is ignored when multi-position - * output is enabled, so we have to write 0 to inactive views ourselves. - */ - NIR_PASS(progress, nir, lower_multiview_mask, &mask); - - NIR_PASS_V(nir, nir_lower_multiview, mask); - progress = true; - } - - return progress; -} - diff --git a/lib/mesa/src/freedreno/vulkan/tu_pass.c b/lib/mesa/src/freedreno/vulkan/tu_pass.c deleted file mode 100644 index e3d9f23df..000000000 --- a/lib/mesa/src/freedreno/vulkan/tu_pass.c +++ /dev/null @@ -1,416 +0,0 @@ -/* - * Copyright © 2016 Red Hat. - * Copyright © 2016 Bas Nieuwenhuizen - * - * based in part on anv driver which is: - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ -#include "tu_private.h" - -#include "vk_util.h" - -VkResult -tu_CreateRenderPass(VkDevice _device, - const VkRenderPassCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkRenderPass *pRenderPass) -{ - TU_FROM_HANDLE(tu_device, device, _device); - struct tu_render_pass *pass; - size_t size; - size_t attachments_offset; - VkRenderPassMultiviewCreateInfo *multiview_info = NULL; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO); - - size = sizeof(*pass); - size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]); - attachments_offset = size; - size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]); - - pass = vk_alloc2(&device->alloc, pAllocator, size, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (pass == NULL) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - - memset(pass, 0, size); - pass->attachment_count = pCreateInfo->attachmentCount; - pass->subpass_count = pCreateInfo->subpassCount; - pass->attachments = (void *) pass + attachments_offset; - - vk_foreach_struct(ext, pCreateInfo->pNext) - { - switch (ext->sType) { - case VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO: - multiview_info = (VkRenderPassMultiviewCreateInfo *) ext; - break; - default: - break; - } - } - - for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { - struct tu_render_pass_attachment *att = &pass->attachments[i]; - - att->format = pCreateInfo->pAttachments[i].format; - att->samples = pCreateInfo->pAttachments[i].samples; - att->load_op = pCreateInfo->pAttachments[i].loadOp; - att->stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp; - att->initial_layout = pCreateInfo->pAttachments[i].initialLayout; - att->final_layout = pCreateInfo->pAttachments[i].finalLayout; - // att->store_op = pCreateInfo->pAttachments[i].storeOp; - // att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp; - } - uint32_t subpass_attachment_count = 0; - struct tu_subpass_attachment *p; - for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { - const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i]; - - subpass_attachment_count += - desc->inputAttachmentCount + desc->colorAttachmentCount + - (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) + - (desc->pDepthStencilAttachment != NULL); - } - - if (subpass_attachment_count) { - pass->subpass_attachments = vk_alloc2( - &device->alloc, pAllocator, - subpass_attachment_count * sizeof(struct tu_subpass_attachment), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (pass->subpass_attachments == NULL) { - vk_free2(&device->alloc, pAllocator, pass); - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - } - } else - pass->subpass_attachments = NULL; - - p = pass->subpass_attachments; - for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { - const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i]; - uint32_t color_sample_count = 1, depth_sample_count = 1; - struct tu_subpass *subpass = &pass->subpasses[i]; - - subpass->input_count = desc->inputAttachmentCount; - subpass->color_count = desc->colorAttachmentCount; - if (multiview_info) - subpass->view_mask = multiview_info->pViewMasks[i]; - - if (desc->inputAttachmentCount > 0) { - subpass->input_attachments = p; - p += desc->inputAttachmentCount; - - for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) { - subpass->input_attachments[j] = (struct tu_subpass_attachment) { - .attachment = desc->pInputAttachments[j].attachment, - .layout = desc->pInputAttachments[j].layout, - }; - if (desc->pInputAttachments[j].attachment != VK_ATTACHMENT_UNUSED) - pass->attachments[desc->pInputAttachments[j].attachment] - .view_mask |= subpass->view_mask; - } - } - - if (desc->colorAttachmentCount > 0) { - subpass->color_attachments = p; - p += desc->colorAttachmentCount; - - for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { - subpass->color_attachments[j] = (struct tu_subpass_attachment) { - .attachment = desc->pColorAttachments[j].attachment, - .layout = desc->pColorAttachments[j].layout, - }; - if (desc->pColorAttachments[j].attachment != - VK_ATTACHMENT_UNUSED) { - pass->attachments[desc->pColorAttachments[j].attachment] - .view_mask |= subpass->view_mask; - color_sample_count = - pCreateInfo - ->pAttachments[desc->pColorAttachments[j].attachment] - .samples; - } - } - } - - subpass->has_resolve = false; - if (desc->pResolveAttachments) { - subpass->resolve_attachments = p; - p += desc->colorAttachmentCount; - - for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { - uint32_t a = desc->pResolveAttachments[j].attachment; - subpass->resolve_attachments[j] = (struct tu_subpass_attachment) { - .attachment = desc->pResolveAttachments[j].attachment, - .layout = desc->pResolveAttachments[j].layout, - }; - if (a != VK_ATTACHMENT_UNUSED) { - subpass->has_resolve = true; - pass->attachments[desc->pResolveAttachments[j].attachment] - .view_mask |= subpass->view_mask; - } - } - } - - if (desc->pDepthStencilAttachment) { - subpass->depth_stencil_attachment = (struct tu_subpass_attachment) { - .attachment = desc->pDepthStencilAttachment->attachment, - .layout = desc->pDepthStencilAttachment->layout, - }; - if (desc->pDepthStencilAttachment->attachment != - VK_ATTACHMENT_UNUSED) { - pass->attachments[desc->pDepthStencilAttachment->attachment] - .view_mask |= subpass->view_mask; - depth_sample_count = - pCreateInfo - ->pAttachments[desc->pDepthStencilAttachment->attachment] - .samples; - } - } else { - subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED; - } - - subpass->max_sample_count = - MAX2(color_sample_count, depth_sample_count); - } - - for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) { - uint32_t dst = pCreateInfo->pDependencies[i].dstSubpass; - if (dst == VK_SUBPASS_EXTERNAL) { - pass->end_barrier.src_stage_mask = - pCreateInfo->pDependencies[i].srcStageMask; - pass->end_barrier.src_access_mask = - pCreateInfo->pDependencies[i].srcAccessMask; - pass->end_barrier.dst_access_mask = - pCreateInfo->pDependencies[i].dstAccessMask; - } else { - pass->subpasses[dst].start_barrier.src_stage_mask = - pCreateInfo->pDependencies[i].srcStageMask; - pass->subpasses[dst].start_barrier.src_access_mask = - pCreateInfo->pDependencies[i].srcAccessMask; - pass->subpasses[dst].start_barrier.dst_access_mask = - pCreateInfo->pDependencies[i].dstAccessMask; - } - } - - *pRenderPass = tu_render_pass_to_handle(pass); - - return VK_SUCCESS; -} - -VkResult -tu_CreateRenderPass2KHR(VkDevice _device, - const VkRenderPassCreateInfo2KHR *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkRenderPass *pRenderPass) -{ - TU_FROM_HANDLE(tu_device, device, _device); - struct tu_render_pass *pass; - size_t size; - size_t attachments_offset; - - assert(pCreateInfo->sType == - VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR); - - size = sizeof(*pass); - size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]); - attachments_offset = size; - size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]); - - pass = vk_alloc2(&device->alloc, pAllocator, size, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (pass == NULL) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - - memset(pass, 0, size); - pass->attachment_count = pCreateInfo->attachmentCount; - pass->subpass_count = pCreateInfo->subpassCount; - pass->attachments = (void *) pass + attachments_offset; - - for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { - struct tu_render_pass_attachment *att = &pass->attachments[i]; - - att->format = pCreateInfo->pAttachments[i].format; - att->samples = pCreateInfo->pAttachments[i].samples; - att->load_op = pCreateInfo->pAttachments[i].loadOp; - att->stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp; - att->initial_layout = pCreateInfo->pAttachments[i].initialLayout; - att->final_layout = pCreateInfo->pAttachments[i].finalLayout; - // att->store_op = pCreateInfo->pAttachments[i].storeOp; - // att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp; - } - uint32_t subpass_attachment_count = 0; - struct tu_subpass_attachment *p; - for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { - const VkSubpassDescription2KHR *desc = &pCreateInfo->pSubpasses[i]; - - subpass_attachment_count += - desc->inputAttachmentCount + desc->colorAttachmentCount + - (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) + - (desc->pDepthStencilAttachment != NULL); - } - - if (subpass_attachment_count) { - pass->subpass_attachments = vk_alloc2( - &device->alloc, pAllocator, - subpass_attachment_count * sizeof(struct tu_subpass_attachment), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (pass->subpass_attachments == NULL) { - vk_free2(&device->alloc, pAllocator, pass); - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - } - } else - pass->subpass_attachments = NULL; - - p = pass->subpass_attachments; - for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) { - const VkSubpassDescription2KHR *desc = &pCreateInfo->pSubpasses[i]; - uint32_t color_sample_count = 1, depth_sample_count = 1; - struct tu_subpass *subpass = &pass->subpasses[i]; - - subpass->input_count = desc->inputAttachmentCount; - subpass->color_count = desc->colorAttachmentCount; - subpass->view_mask = desc->viewMask; - - if (desc->inputAttachmentCount > 0) { - subpass->input_attachments = p; - p += desc->inputAttachmentCount; - - for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) { - subpass->input_attachments[j] = (struct tu_subpass_attachment) { - .attachment = desc->pInputAttachments[j].attachment, - .layout = desc->pInputAttachments[j].layout, - }; - if (desc->pInputAttachments[j].attachment != VK_ATTACHMENT_UNUSED) - pass->attachments[desc->pInputAttachments[j].attachment] - .view_mask |= subpass->view_mask; - } - } - - if (desc->colorAttachmentCount > 0) { - subpass->color_attachments = p; - p += desc->colorAttachmentCount; - - for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { - subpass->color_attachments[j] = (struct tu_subpass_attachment) { - .attachment = desc->pColorAttachments[j].attachment, - .layout = desc->pColorAttachments[j].layout, - }; - if (desc->pColorAttachments[j].attachment != - VK_ATTACHMENT_UNUSED) { - pass->attachments[desc->pColorAttachments[j].attachment] - .view_mask |= subpass->view_mask; - color_sample_count = - pCreateInfo - ->pAttachments[desc->pColorAttachments[j].attachment] - .samples; - } - } - } - - subpass->has_resolve = false; - if (desc->pResolveAttachments) { - subpass->resolve_attachments = p; - p += desc->colorAttachmentCount; - - for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) { - uint32_t a = desc->pResolveAttachments[j].attachment; - subpass->resolve_attachments[j] = (struct tu_subpass_attachment) { - .attachment = desc->pResolveAttachments[j].attachment, - .layout = desc->pResolveAttachments[j].layout, - }; - if (a != VK_ATTACHMENT_UNUSED) { - subpass->has_resolve = true; - pass->attachments[desc->pResolveAttachments[j].attachment] - .view_mask |= subpass->view_mask; - } - } - } - - if (desc->pDepthStencilAttachment) { - subpass->depth_stencil_attachment = (struct tu_subpass_attachment) { - .attachment = desc->pDepthStencilAttachment->attachment, - .layout = desc->pDepthStencilAttachment->layout, - }; - if (desc->pDepthStencilAttachment->attachment != - VK_ATTACHMENT_UNUSED) { - pass->attachments[desc->pDepthStencilAttachment->attachment] - .view_mask |= subpass->view_mask; - depth_sample_count = - pCreateInfo - ->pAttachments[desc->pDepthStencilAttachment->attachment] - .samples; - } - } else { - subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED; - } - - subpass->max_sample_count = - MAX2(color_sample_count, depth_sample_count); - } - - for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) { - uint32_t dst = pCreateInfo->pDependencies[i].dstSubpass; - if (dst == VK_SUBPASS_EXTERNAL) { - pass->end_barrier.src_stage_mask = - pCreateInfo->pDependencies[i].srcStageMask; - pass->end_barrier.src_access_mask = - pCreateInfo->pDependencies[i].srcAccessMask; - pass->end_barrier.dst_access_mask = - pCreateInfo->pDependencies[i].dstAccessMask; - } else { - pass->subpasses[dst].start_barrier.src_stage_mask = - pCreateInfo->pDependencies[i].srcStageMask; - pass->subpasses[dst].start_barrier.src_access_mask = - pCreateInfo->pDependencies[i].srcAccessMask; - pass->subpasses[dst].start_barrier.dst_access_mask = - pCreateInfo->pDependencies[i].dstAccessMask; - } - } - - *pRenderPass = tu_render_pass_to_handle(pass); - - return VK_SUCCESS; -} - -void -tu_DestroyRenderPass(VkDevice _device, - VkRenderPass _pass, - const VkAllocationCallbacks *pAllocator) -{ - TU_FROM_HANDLE(tu_device, device, _device); - TU_FROM_HANDLE(tu_render_pass, pass, _pass); - - if (!_pass) - return; - vk_free2(&device->alloc, pAllocator, pass->subpass_attachments); - vk_free2(&device->alloc, pAllocator, pass); -} - -void -tu_GetRenderAreaGranularity(VkDevice _device, - VkRenderPass renderPass, - VkExtent2D *pGranularity) -{ - TU_FROM_HANDLE(tu_device, device, _device); - - pGranularity->width = device->physical_device->tile_align_w; - pGranularity->height = device->physical_device->tile_align_h; -} diff --git a/lib/mesa/src/freedreno/vulkan/tu_perfetto_util.c b/lib/mesa/src/freedreno/vulkan/tu_perfetto_util.c deleted file mode 100644 index 7d13678c6..000000000 --- a/lib/mesa/src/freedreno/vulkan/tu_perfetto_util.c +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright © 2021 Igalia S.L. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "tu_private.h" -#include "tu_perfetto.h" - -/* Including tu_private.h in tu_perfetto.cc doesn't work, so - * we need some helper methods to access tu_device. - */ - -struct tu_perfetto_state * -tu_device_get_perfetto_state(struct tu_device *dev) -{ - return &dev->perfetto; -} - -int -tu_device_get_timestamp(struct tu_device *dev, - uint64_t *ts) -{ - return tu_drm_get_timestamp(dev->physical_device, ts); -} - -uint32_t -tu_u_trace_flush_data_get_submit_id(const struct tu_u_trace_flush_data *data) -{ - return data->submission_id; -} diff --git a/lib/mesa/src/freedreno/vulkan/tu_pipeline.c b/lib/mesa/src/freedreno/vulkan/tu_pipeline.c deleted file mode 100644 index 9964020a8..000000000 --- a/lib/mesa/src/freedreno/vulkan/tu_pipeline.c +++ /dev/null @@ -1,1896 +0,0 @@ -/* - * Copyright © 2016 Red Hat. - * Copyright © 2016 Bas Nieuwenhuizen - * - * based in part on anv driver which is: - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "tu_private.h" - -#include "main/menums.h" -#include "nir/nir.h" -#include "nir/nir_builder.h" -#include "spirv/nir_spirv.h" -#include "util/debug.h" -#include "util/mesa-sha1.h" -#include "util/u_atomic.h" -#include "vk_format.h" -#include "vk_util.h" - -#include "tu_cs.h" - -struct tu_pipeline_builder -{ - struct tu_device *device; - struct tu_pipeline_cache *cache; - const VkAllocationCallbacks *alloc; - const VkGraphicsPipelineCreateInfo *create_info; - - struct tu_shader *shaders[MESA_SHADER_STAGES]; - uint32_t shader_offsets[MESA_SHADER_STAGES]; - uint32_t binning_vs_offset; - uint32_t shader_total_size; - - bool rasterizer_discard; - /* these states are affectd by rasterizer_discard */ - VkSampleCountFlagBits samples; - bool use_depth_stencil_attachment; - bool use_color_attachments; - uint32_t color_attachment_count; - VkFormat color_attachment_formats[MAX_RTS]; -}; - -static enum tu_dynamic_state_bits -tu_dynamic_state_bit(VkDynamicState state) -{ - switch (state) { - case VK_DYNAMIC_STATE_VIEWPORT: - return TU_DYNAMIC_VIEWPORT; - case VK_DYNAMIC_STATE_SCISSOR: - return TU_DYNAMIC_SCISSOR; - case VK_DYNAMIC_STATE_LINE_WIDTH: - return TU_DYNAMIC_LINE_WIDTH; - case VK_DYNAMIC_STATE_DEPTH_BIAS: - return TU_DYNAMIC_DEPTH_BIAS; - case VK_DYNAMIC_STATE_BLEND_CONSTANTS: - return TU_DYNAMIC_BLEND_CONSTANTS; - case VK_DYNAMIC_STATE_DEPTH_BOUNDS: - return TU_DYNAMIC_DEPTH_BOUNDS; - case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK: - return TU_DYNAMIC_STENCIL_COMPARE_MASK; - case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK: - return TU_DYNAMIC_STENCIL_WRITE_MASK; - case VK_DYNAMIC_STATE_STENCIL_REFERENCE: - return TU_DYNAMIC_STENCIL_REFERENCE; - default: - unreachable("invalid dynamic state"); - return 0; - } -} - -static gl_shader_stage -tu_shader_stage(VkShaderStageFlagBits stage) -{ - switch (stage) { - case VK_SHADER_STAGE_VERTEX_BIT: - return MESA_SHADER_VERTEX; - case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: - return MESA_SHADER_TESS_CTRL; - case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: - return MESA_SHADER_TESS_EVAL; - case VK_SHADER_STAGE_GEOMETRY_BIT: - return MESA_SHADER_GEOMETRY; - case VK_SHADER_STAGE_FRAGMENT_BIT: - return MESA_SHADER_FRAGMENT; - case VK_SHADER_STAGE_COMPUTE_BIT: - return MESA_SHADER_COMPUTE; - default: - unreachable("invalid VkShaderStageFlagBits"); - return MESA_SHADER_NONE; - } -} - -static const VkVertexInputAttributeDescription * -tu_find_vertex_input_attribute( - const VkPipelineVertexInputStateCreateInfo *vi_info, uint32_t slot) -{ - assert(slot >= VERT_ATTRIB_GENERIC0); - slot -= VERT_ATTRIB_GENERIC0; - for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) { - if (vi_info->pVertexAttributeDescriptions[i].location == slot) - return &vi_info->pVertexAttributeDescriptions[i]; - } - return NULL; -} - -static const VkVertexInputBindingDescription * -tu_find_vertex_input_binding( - const VkPipelineVertexInputStateCreateInfo *vi_info, - const VkVertexInputAttributeDescription *vi_attr) -{ - assert(vi_attr); - for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) { - if (vi_info->pVertexBindingDescriptions[i].binding == vi_attr->binding) - return &vi_info->pVertexBindingDescriptions[i]; - } - return NULL; -} - -static bool -tu_logic_op_reads_dst(VkLogicOp op) -{ - switch (op) { - case VK_LOGIC_OP_CLEAR: - case VK_LOGIC_OP_COPY: - case VK_LOGIC_OP_COPY_INVERTED: - case VK_LOGIC_OP_SET: - return false; - default: - return true; - } -} - -static VkBlendFactor -tu_blend_factor_no_dst_alpha(VkBlendFactor factor) -{ - /* treat dst alpha as 1.0 and avoid reading it */ - switch (factor) { - case VK_BLEND_FACTOR_DST_ALPHA: - return VK_BLEND_FACTOR_ONE; - case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: - return VK_BLEND_FACTOR_ZERO; - default: - return factor; - } -} - -static enum pc_di_primtype -tu6_primtype(VkPrimitiveTopology topology) -{ - switch (topology) { - case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: - return DI_PT_POINTLIST; - case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: - return DI_PT_LINELIST; - case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP: - return DI_PT_LINESTRIP; - case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST: - return DI_PT_TRILIST; - case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP: - return DI_PT_TRILIST; - case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN: - return DI_PT_TRIFAN; - case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY: - return DI_PT_LINE_ADJ; - case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY: - return DI_PT_LINESTRIP_ADJ; - case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY: - return DI_PT_TRI_ADJ; - case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY: - return DI_PT_TRISTRIP_ADJ; - case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST: - default: - unreachable("invalid primitive topology"); - return DI_PT_NONE; - } -} - -static enum adreno_compare_func -tu6_compare_func(VkCompareOp op) -{ - switch (op) { - case VK_COMPARE_OP_NEVER: - return FUNC_NEVER; - case VK_COMPARE_OP_LESS: - return FUNC_LESS; - case VK_COMPARE_OP_EQUAL: - return FUNC_EQUAL; - case VK_COMPARE_OP_LESS_OR_EQUAL: - return FUNC_LEQUAL; - case VK_COMPARE_OP_GREATER: - return FUNC_GREATER; - case VK_COMPARE_OP_NOT_EQUAL: - return FUNC_NOTEQUAL; - case VK_COMPARE_OP_GREATER_OR_EQUAL: - return FUNC_GEQUAL; - case VK_COMPARE_OP_ALWAYS: - return FUNC_ALWAYS; - default: - unreachable("invalid VkCompareOp"); - return FUNC_NEVER; - } -} - -static enum adreno_stencil_op -tu6_stencil_op(VkStencilOp op) -{ - switch (op) { - case VK_STENCIL_OP_KEEP: - return STENCIL_KEEP; - case VK_STENCIL_OP_ZERO: - return STENCIL_ZERO; - case VK_STENCIL_OP_REPLACE: - return STENCIL_REPLACE; - case VK_STENCIL_OP_INCREMENT_AND_CLAMP: - return STENCIL_INCR_CLAMP; - case VK_STENCIL_OP_DECREMENT_AND_CLAMP: - return STENCIL_DECR_CLAMP; - case VK_STENCIL_OP_INVERT: - return STENCIL_INVERT; - case VK_STENCIL_OP_INCREMENT_AND_WRAP: - return STENCIL_INCR_WRAP; - case VK_STENCIL_OP_DECREMENT_AND_WRAP: - return STENCIL_DECR_WRAP; - default: - unreachable("invalid VkStencilOp"); - return STENCIL_KEEP; - } -} - -static enum a3xx_rop_code -tu6_rop(VkLogicOp op) -{ - switch (op) { - case VK_LOGIC_OP_CLEAR: - return ROP_CLEAR; - case VK_LOGIC_OP_AND: - return ROP_AND; - case VK_LOGIC_OP_AND_REVERSE: - return ROP_AND_REVERSE; - case VK_LOGIC_OP_COPY: - return ROP_COPY; - case VK_LOGIC_OP_AND_INVERTED: - return ROP_AND_INVERTED; - case VK_LOGIC_OP_NO_OP: - return ROP_NOOP; - case VK_LOGIC_OP_XOR: - return ROP_XOR; - case VK_LOGIC_OP_OR: - return ROP_OR; - case VK_LOGIC_OP_NOR: - return ROP_NOR; - case VK_LOGIC_OP_EQUIVALENT: - return ROP_EQUIV; - case VK_LOGIC_OP_INVERT: - return ROP_INVERT; - case VK_LOGIC_OP_OR_REVERSE: - return ROP_OR_REVERSE; - case VK_LOGIC_OP_COPY_INVERTED: - return ROP_COPY_INVERTED; - case VK_LOGIC_OP_OR_INVERTED: - return ROP_OR_INVERTED; - case VK_LOGIC_OP_NAND: - return ROP_NAND; - case VK_LOGIC_OP_SET: - return ROP_SET; - default: - unreachable("invalid VkLogicOp"); - return ROP_NOOP; - } -} - -static enum adreno_rb_blend_factor -tu6_blend_factor(VkBlendFactor factor) -{ - switch (factor) { - case VK_BLEND_FACTOR_ZERO: - return FACTOR_ZERO; - case VK_BLEND_FACTOR_ONE: - return FACTOR_ONE; - case VK_BLEND_FACTOR_SRC_COLOR: - return FACTOR_SRC_COLOR; - case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR: - return FACTOR_ONE_MINUS_SRC_COLOR; - case VK_BLEND_FACTOR_DST_COLOR: - return FACTOR_DST_COLOR; - case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR: - return FACTOR_ONE_MINUS_DST_COLOR; - case VK_BLEND_FACTOR_SRC_ALPHA: - return FACTOR_SRC_ALPHA; - case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: - return FACTOR_ONE_MINUS_SRC_ALPHA; - case VK_BLEND_FACTOR_DST_ALPHA: - return FACTOR_DST_ALPHA; - case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: - return FACTOR_ONE_MINUS_DST_ALPHA; - case VK_BLEND_FACTOR_CONSTANT_COLOR: - return FACTOR_CONSTANT_COLOR; - case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR: - return FACTOR_ONE_MINUS_CONSTANT_COLOR; - case VK_BLEND_FACTOR_CONSTANT_ALPHA: - return FACTOR_CONSTANT_ALPHA; - case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA: - return FACTOR_ONE_MINUS_CONSTANT_ALPHA; - case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE: - return FACTOR_SRC_ALPHA_SATURATE; - case VK_BLEND_FACTOR_SRC1_COLOR: - return FACTOR_SRC1_COLOR; - case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR: - return FACTOR_ONE_MINUS_SRC1_COLOR; - case VK_BLEND_FACTOR_SRC1_ALPHA: - return FACTOR_SRC1_ALPHA; - case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA: - return FACTOR_ONE_MINUS_SRC1_ALPHA; - default: - unreachable("invalid VkBlendFactor"); - return FACTOR_ZERO; - } -} - -static enum a3xx_rb_blend_opcode -tu6_blend_op(VkBlendOp op) -{ - switch (op) { - case VK_BLEND_OP_ADD: - return BLEND_DST_PLUS_SRC; - case VK_BLEND_OP_SUBTRACT: - return BLEND_SRC_MINUS_DST; - case VK_BLEND_OP_REVERSE_SUBTRACT: - return BLEND_DST_MINUS_SRC; - case VK_BLEND_OP_MIN: - return BLEND_MIN_DST_SRC; - case VK_BLEND_OP_MAX: - return BLEND_MAX_DST_SRC; - default: - unreachable("invalid VkBlendOp"); - return BLEND_DST_PLUS_SRC; - } -} - -static void -tu6_emit_vs_config(struct tu_cs *cs, const struct ir3_shader_variant *vs) -{ - uint32_t sp_vs_ctrl = - A6XX_SP_VS_CTRL_REG0_THREADSIZE(FOUR_QUADS) | - A6XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vs->info.max_reg + 1) | - A6XX_SP_VS_CTRL_REG0_MERGEDREGS | - A6XX_SP_VS_CTRL_REG0_BRANCHSTACK(vs->branchstack); - if (vs->num_samp) - sp_vs_ctrl |= A6XX_SP_VS_CTRL_REG0_PIXLODENABLE; - - uint32_t sp_vs_config = A6XX_SP_VS_CONFIG_NTEX(vs->num_samp) | - A6XX_SP_VS_CONFIG_NSAMP(vs->num_samp); - if (vs->instrlen) - sp_vs_config |= A6XX_SP_VS_CONFIG_ENABLED; - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_VS_CTRL_REG0, 1); - tu_cs_emit(cs, sp_vs_ctrl); - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_VS_CONFIG, 2); - tu_cs_emit(cs, sp_vs_config); - tu_cs_emit(cs, vs->instrlen); - - tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_VS_CNTL, 1); - tu_cs_emit(cs, A6XX_HLSQ_VS_CNTL_CONSTLEN(align(vs->constlen, 4)) | 0x100); -} - -static void -tu6_emit_hs_config(struct tu_cs *cs, const struct ir3_shader_variant *hs) -{ - uint32_t sp_hs_config = 0; - if (hs->instrlen) - sp_hs_config |= A6XX_SP_HS_CONFIG_ENABLED; - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_HS_UNKNOWN_A831, 1); - tu_cs_emit(cs, 0); - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_HS_CONFIG, 2); - tu_cs_emit(cs, sp_hs_config); - tu_cs_emit(cs, hs->instrlen); - - tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_HS_CNTL, 1); - tu_cs_emit(cs, A6XX_HLSQ_HS_CNTL_CONSTLEN(align(hs->constlen, 4))); -} - -static void -tu6_emit_ds_config(struct tu_cs *cs, const struct ir3_shader_variant *ds) -{ - uint32_t sp_ds_config = 0; - if (ds->instrlen) - sp_ds_config |= A6XX_SP_DS_CONFIG_ENABLED; - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_DS_CONFIG, 2); - tu_cs_emit(cs, sp_ds_config); - tu_cs_emit(cs, ds->instrlen); - - tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_DS_CNTL, 1); - tu_cs_emit(cs, A6XX_HLSQ_DS_CNTL_CONSTLEN(align(ds->constlen, 4))); -} - -static void -tu6_emit_gs_config(struct tu_cs *cs, const struct ir3_shader_variant *gs) -{ - uint32_t sp_gs_config = 0; - if (gs->instrlen) - sp_gs_config |= A6XX_SP_GS_CONFIG_ENABLED; - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_UNKNOWN_A871, 1); - tu_cs_emit(cs, 0); - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_CONFIG, 2); - tu_cs_emit(cs, sp_gs_config); - tu_cs_emit(cs, gs->instrlen); - - tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_GS_CNTL, 1); - tu_cs_emit(cs, A6XX_HLSQ_GS_CNTL_CONSTLEN(align(gs->constlen, 4))); -} - -static void -tu6_emit_fs_config(struct tu_cs *cs, const struct ir3_shader_variant *fs) -{ - uint32_t sp_fs_ctrl = - A6XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) | 0x1000000 | - A6XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fs->info.max_reg + 1) | - A6XX_SP_FS_CTRL_REG0_MERGEDREGS | - A6XX_SP_FS_CTRL_REG0_BRANCHSTACK(fs->branchstack); - if (fs->total_in > 0 || fs->frag_coord) - sp_fs_ctrl |= A6XX_SP_FS_CTRL_REG0_VARYING; - if (fs->num_samp > 0) - sp_fs_ctrl |= A6XX_SP_FS_CTRL_REG0_PIXLODENABLE; - - uint32_t sp_fs_config = A6XX_SP_FS_CONFIG_NTEX(fs->num_samp) | - A6XX_SP_FS_CONFIG_NSAMP(fs->num_samp); - if (fs->instrlen) - sp_fs_config |= A6XX_SP_FS_CONFIG_ENABLED; - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_UNKNOWN_A99E, 1); - tu_cs_emit(cs, 0x7fc0); - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_UNKNOWN_A9A8, 1); - tu_cs_emit(cs, 0); - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_UNKNOWN_AB00, 1); - tu_cs_emit(cs, 0x5); - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_CTRL_REG0, 1); - tu_cs_emit(cs, sp_fs_ctrl); - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_CONFIG, 2); - tu_cs_emit(cs, sp_fs_config); - tu_cs_emit(cs, fs->instrlen); - - tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_FS_CNTL, 1); - tu_cs_emit(cs, A6XX_HLSQ_FS_CNTL_CONSTLEN(align(fs->constlen, 4)) | 0x100); -} - -static void -tu6_emit_vs_system_values(struct tu_cs *cs, - const struct ir3_shader_variant *vs) -{ - const uint32_t vertexid_regid = - ir3_find_sysval_regid(vs, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE); - const uint32_t instanceid_regid = - ir3_find_sysval_regid(vs, SYSTEM_VALUE_INSTANCE_ID); - - tu_cs_emit_pkt4(cs, REG_A6XX_VFD_CONTROL_1, 6); - tu_cs_emit(cs, A6XX_VFD_CONTROL_1_REGID4VTX(vertexid_regid) | - A6XX_VFD_CONTROL_1_REGID4INST(instanceid_regid) | - 0xfcfc0000); - tu_cs_emit(cs, 0x0000fcfc); /* VFD_CONTROL_2 */ - tu_cs_emit(cs, 0xfcfcfcfc); /* VFD_CONTROL_3 */ - tu_cs_emit(cs, 0x000000fc); /* VFD_CONTROL_4 */ - tu_cs_emit(cs, 0x0000fcfc); /* VFD_CONTROL_5 */ - tu_cs_emit(cs, 0x00000000); /* VFD_CONTROL_6 */ -} - -static void -tu6_emit_vpc(struct tu_cs *cs, - const struct ir3_shader_variant *vs, - const struct ir3_shader_variant *fs, - bool binning_pass) -{ - struct ir3_shader_linkage linkage = { 0 }; - ir3_link_shaders(&linkage, vs, fs); - - if (vs->shader->stream_output.num_outputs && !binning_pass) - tu_finishme("stream output"); - - BITSET_DECLARE(vpc_var_enables, 128) = { 0 }; - for (uint32_t i = 0; i < linkage.cnt; i++) { - const uint32_t comp_count = util_last_bit(linkage.var[i].compmask); - for (uint32_t j = 0; j < comp_count; j++) - BITSET_SET(vpc_var_enables, linkage.var[i].loc + j); - } - - tu_cs_emit_pkt4(cs, REG_A6XX_VPC_VAR_DISABLE(0), 4); - tu_cs_emit(cs, ~vpc_var_enables[0]); - tu_cs_emit(cs, ~vpc_var_enables[1]); - tu_cs_emit(cs, ~vpc_var_enables[2]); - tu_cs_emit(cs, ~vpc_var_enables[3]); - - /* a6xx finds position/pointsize at the end */ - const uint32_t position_regid = - ir3_find_output_regid(vs, VARYING_SLOT_POS); - const uint32_t pointsize_regid = - ir3_find_output_regid(vs, VARYING_SLOT_PSIZ); - uint32_t pointsize_loc = 0xff; - if (position_regid != regid(63, 0)) - ir3_link_add(&linkage, position_regid, 0xf, linkage.max_loc); - if (pointsize_regid != regid(63, 0)) { - pointsize_loc = linkage.max_loc; - ir3_link_add(&linkage, pointsize_regid, 0x1, linkage.max_loc); - } - - /* map vs outputs to VPC */ - assert(linkage.cnt <= 32); - const uint32_t sp_vs_out_count = (linkage.cnt + 1) / 2; - const uint32_t sp_vs_vpc_dst_count = (linkage.cnt + 3) / 4; - uint32_t sp_vs_out[16]; - uint32_t sp_vs_vpc_dst[8]; - sp_vs_out[sp_vs_out_count - 1] = 0; - sp_vs_vpc_dst[sp_vs_vpc_dst_count - 1] = 0; - for (uint32_t i = 0; i < linkage.cnt; i++) { - ((uint16_t *) sp_vs_out)[i] = - A6XX_SP_VS_OUT_REG_A_REGID(linkage.var[i].regid) | - A6XX_SP_VS_OUT_REG_A_COMPMASK(linkage.var[i].compmask); - ((uint8_t *) sp_vs_vpc_dst)[i] = - A6XX_SP_VS_VPC_DST_REG_OUTLOC0(linkage.var[i].loc); - } - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_VS_OUT_REG(0), sp_vs_out_count); - tu_cs_emit_array(cs, sp_vs_out, sp_vs_out_count); - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_VS_VPC_DST_REG(0), sp_vs_vpc_dst_count); - tu_cs_emit_array(cs, sp_vs_vpc_dst, sp_vs_vpc_dst_count); - - tu_cs_emit_pkt4(cs, REG_A6XX_VPC_CNTL_0, 1); - tu_cs_emit(cs, A6XX_VPC_CNTL_0_NUMNONPOSVAR(fs->total_in) | - (fs->total_in > 0 ? A6XX_VPC_CNTL_0_VARYING : 0) | - 0xff00ff00); - - tu_cs_emit_pkt4(cs, REG_A6XX_VPC_PACK, 1); - tu_cs_emit(cs, A6XX_VPC_PACK_NUMNONPOSVAR(fs->total_in) | - A6XX_VPC_PACK_PSIZELOC(pointsize_loc) | - A6XX_VPC_PACK_STRIDE_IN_VPC(linkage.max_loc)); - - tu_cs_emit_pkt4(cs, REG_A6XX_VPC_GS_SIV_CNTL, 1); - tu_cs_emit(cs, 0x0000ffff); /* XXX */ - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_PRIMITIVE_CNTL, 1); - tu_cs_emit(cs, A6XX_SP_PRIMITIVE_CNTL_VSOUT(linkage.cnt)); - - tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMITIVE_CNTL_1, 1); - tu_cs_emit(cs, A6XX_PC_PRIMITIVE_CNTL_1_STRIDE_IN_VPC(linkage.max_loc) | - (vs->writes_psize ? A6XX_PC_PRIMITIVE_CNTL_1_PSIZE : 0)); -} - -static int -tu6_vpc_varying_mode(const struct ir3_shader_variant *fs, - uint32_t index, - uint8_t *interp_mode, - uint8_t *ps_repl_mode) -{ - enum - { - INTERP_SMOOTH = 0, - INTERP_FLAT = 1, - INTERP_ZERO = 2, - INTERP_ONE = 3, - }; - enum - { - PS_REPL_NONE = 0, - PS_REPL_S = 1, - PS_REPL_T = 2, - PS_REPL_ONE_MINUS_T = 3, - }; - - const uint32_t compmask = fs->inputs[index].compmask; - - /* NOTE: varyings are packed, so if compmask is 0xb then first, second, and - * fourth component occupy three consecutive varying slots - */ - int shift = 0; - *interp_mode = 0; - *ps_repl_mode = 0; - if (fs->inputs[index].slot == VARYING_SLOT_PNTC) { - if (compmask & 0x1) { - *ps_repl_mode |= PS_REPL_S << shift; - shift += 2; - } - if (compmask & 0x2) { - *ps_repl_mode |= PS_REPL_T << shift; - shift += 2; - } - if (compmask & 0x4) { - *interp_mode |= INTERP_ZERO << shift; - shift += 2; - } - if (compmask & 0x8) { - *interp_mode |= INTERP_ONE << 6; - shift += 2; - } - } else if ((fs->inputs[index].interpolate == INTERP_MODE_FLAT) || - fs->inputs[index].rasterflat) { - for (int i = 0; i < 4; i++) { - if (compmask & (1 << i)) { - *interp_mode |= INTERP_FLAT << shift; - shift += 2; - } - } - } - - return shift; -} - -static void -tu6_emit_vpc_varying_modes(struct tu_cs *cs, - const struct ir3_shader_variant *fs, - bool binning_pass) -{ - uint32_t interp_modes[8] = { 0 }; - uint32_t ps_repl_modes[8] = { 0 }; - - if (!binning_pass) { - for (int i = -1; - (i = ir3_next_varying(fs, i)) < (int) fs->inputs_count;) { - - /* get the mode for input i */ - uint8_t interp_mode; - uint8_t ps_repl_mode; - const int bits = - tu6_vpc_varying_mode(fs, i, &interp_mode, &ps_repl_mode); - - /* OR the mode into the array */ - const uint32_t inloc = fs->inputs[i].inloc * 2; - uint32_t n = inloc / 32; - uint32_t shift = inloc % 32; - interp_modes[n] |= interp_mode << shift; - ps_repl_modes[n] |= ps_repl_mode << shift; - if (shift + bits > 32) { - n++; - shift = 32 - shift; - - interp_modes[n] |= interp_mode >> shift; - ps_repl_modes[n] |= ps_repl_mode >> shift; - } - } - } - - tu_cs_emit_pkt4(cs, REG_A6XX_VPC_VARYING_INTERP_MODE(0), 8); - tu_cs_emit_array(cs, interp_modes, 8); - - tu_cs_emit_pkt4(cs, REG_A6XX_VPC_VARYING_PS_REPL_MODE(0), 8); - tu_cs_emit_array(cs, ps_repl_modes, 8); -} - -static void -tu6_emit_fs_system_values(struct tu_cs *cs, - const struct ir3_shader_variant *fs) -{ - const uint32_t frontfacing_regid = - ir3_find_sysval_regid(fs, SYSTEM_VALUE_FRONT_FACE); - const uint32_t sampleid_regid = - ir3_find_sysval_regid(fs, SYSTEM_VALUE_SAMPLE_ID); - const uint32_t samplemaskin_regid = - ir3_find_sysval_regid(fs, SYSTEM_VALUE_SAMPLE_MASK_IN); - const uint32_t fragcoord_xy_regid = - ir3_find_sysval_regid(fs, SYSTEM_VALUE_FRAG_COORD); - const uint32_t fragcoord_zw_regid = (fragcoord_xy_regid != regid(63, 0)) - ? (fragcoord_xy_regid + 2) - : fragcoord_xy_regid; - const uint32_t varyingcoord_regid = - ir3_find_sysval_regid(fs, SYSTEM_VALUE_BARYCENTRIC_PIXEL); - - tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_CONTROL_1_REG, 5); - tu_cs_emit(cs, 0x7); - tu_cs_emit(cs, A6XX_HLSQ_CONTROL_2_REG_FACEREGID(frontfacing_regid) | - A6XX_HLSQ_CONTROL_2_REG_SAMPLEID(sampleid_regid) | - A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK(samplemaskin_regid) | - A6XX_HLSQ_CONTROL_2_REG_SIZE(regid(63, 0))); - tu_cs_emit(cs, - A6XX_HLSQ_CONTROL_3_REG_BARY_IJ_PIXEL(varyingcoord_regid) | - A6XX_HLSQ_CONTROL_3_REG_BARY_IJ_CENTROID(regid(63, 0)) | - 0xfc00fc00); - tu_cs_emit(cs, - A6XX_HLSQ_CONTROL_4_REG_XYCOORDREGID(fragcoord_xy_regid) | - A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID(fragcoord_zw_regid) | - A6XX_HLSQ_CONTROL_4_REG_BARY_IJ_PIXEL_PERSAMP(regid(63, 0)) | - 0x0000fc00); - tu_cs_emit(cs, 0xfc); -} - -static void -tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs) -{ - tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_UNKNOWN_B980, 1); - tu_cs_emit(cs, fs->total_in > 0 ? 3 : 1); - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_UNKNOWN_A982, 1); - tu_cs_emit(cs, 0); /* XXX */ - - tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_UPDATE_CNTL, 1); - tu_cs_emit(cs, 0xff); /* XXX */ - - uint32_t gras_cntl = 0; - if (fs->total_in > 0) - gras_cntl |= A6XX_GRAS_CNTL_VARYING; - if (fs->frag_coord) { - gras_cntl |= A6XX_GRAS_CNTL_SIZE | A6XX_GRAS_CNTL_XCOORD | - A6XX_GRAS_CNTL_YCOORD | A6XX_GRAS_CNTL_ZCOORD | - A6XX_GRAS_CNTL_WCOORD; - } - - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CNTL, 1); - tu_cs_emit(cs, gras_cntl); - - uint32_t rb_render_control = 0; - if (fs->total_in > 0) { - rb_render_control = - A6XX_RB_RENDER_CONTROL0_VARYING | A6XX_RB_RENDER_CONTROL0_UNK10; - } - if (fs->frag_coord) { - rb_render_control |= - A6XX_RB_RENDER_CONTROL0_SIZE | A6XX_RB_RENDER_CONTROL0_XCOORD | - A6XX_RB_RENDER_CONTROL0_YCOORD | A6XX_RB_RENDER_CONTROL0_ZCOORD | - A6XX_RB_RENDER_CONTROL0_WCOORD; - } - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_RENDER_CONTROL0, 2); - tu_cs_emit(cs, rb_render_control); - tu_cs_emit(cs, (fs->frag_face ? A6XX_RB_RENDER_CONTROL1_FACENESS : 0)); -} - -static void -tu6_emit_fs_outputs(struct tu_cs *cs, - const struct ir3_shader_variant *fs, - uint32_t mrt_count) -{ - const uint32_t fragdepth_regid = - ir3_find_output_regid(fs, FRAG_RESULT_DEPTH); - uint32_t fragdata_regid[8]; - if (fs->color0_mrt) { - fragdata_regid[0] = ir3_find_output_regid(fs, FRAG_RESULT_COLOR); - for (uint32_t i = 1; i < ARRAY_SIZE(fragdata_regid); i++) - fragdata_regid[i] = fragdata_regid[0]; - } else { - for (uint32_t i = 0; i < ARRAY_SIZE(fragdata_regid); i++) - fragdata_regid[i] = ir3_find_output_regid(fs, FRAG_RESULT_DATA0 + i); - } - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_CNTL0, 2); - tu_cs_emit( - cs, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(fragdepth_regid) | 0xfcfc0000); - tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL1_MRT(mrt_count)); - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_REG(0), 8); - for (uint32_t i = 0; i < ARRAY_SIZE(fragdata_regid); i++) { - // TODO we could have a mix of half and full precision outputs, - // we really need to figure out half-precision from IR3_REG_HALF - tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_REG_REGID(fragdata_regid[i]) | - (false ? A6XX_SP_FS_OUTPUT_REG_HALF_PRECISION : 0)); - } - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_FS_OUTPUT_CNTL0, 2); - tu_cs_emit(cs, fs->writes_pos ? A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_Z : 0); - tu_cs_emit(cs, A6XX_RB_FS_OUTPUT_CNTL1_MRT(mrt_count)); - - uint32_t gras_su_depth_plane_cntl = 0; - uint32_t rb_depth_plane_cntl = 0; - if (fs->no_earlyz | fs->writes_pos) { - gras_su_depth_plane_cntl |= A6XX_GRAS_SU_DEPTH_PLANE_CNTL_FRAG_WRITES_Z; - rb_depth_plane_cntl |= A6XX_RB_DEPTH_PLANE_CNTL_FRAG_WRITES_Z; - } - - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_DEPTH_PLANE_CNTL, 1); - tu_cs_emit(cs, gras_su_depth_plane_cntl); - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_DEPTH_PLANE_CNTL, 1); - tu_cs_emit(cs, rb_depth_plane_cntl); -} - -static void -tu6_emit_shader_object(struct tu_cs *cs, - gl_shader_stage stage, - const struct ir3_shader_variant *variant, - const struct tu_bo *binary_bo, - uint32_t binary_offset) -{ - uint16_t reg; - uint8_t opcode; - enum a6xx_state_block sb; - switch (stage) { - case MESA_SHADER_VERTEX: - reg = REG_A6XX_SP_VS_OBJ_START_LO; - opcode = CP_LOAD_STATE6_GEOM; - sb = SB6_VS_SHADER; - break; - case MESA_SHADER_TESS_CTRL: - reg = REG_A6XX_SP_HS_OBJ_START_LO; - opcode = CP_LOAD_STATE6_GEOM; - sb = SB6_HS_SHADER; - break; - case MESA_SHADER_TESS_EVAL: - reg = REG_A6XX_SP_DS_OBJ_START_LO; - opcode = CP_LOAD_STATE6_GEOM; - sb = SB6_DS_SHADER; - break; - case MESA_SHADER_GEOMETRY: - reg = REG_A6XX_SP_GS_OBJ_START_LO; - opcode = CP_LOAD_STATE6_GEOM; - sb = SB6_GS_SHADER; - break; - case MESA_SHADER_FRAGMENT: - reg = REG_A6XX_SP_FS_OBJ_START_LO; - opcode = CP_LOAD_STATE6_FRAG; - sb = SB6_FS_SHADER; - break; - case MESA_SHADER_COMPUTE: - reg = REG_A6XX_SP_CS_OBJ_START_LO; - opcode = CP_LOAD_STATE6_FRAG; - sb = SB6_CS_SHADER; - break; - default: - unreachable("invalid gl_shader_stage"); - opcode = CP_LOAD_STATE6_GEOM; - sb = SB6_VS_SHADER; - break; - } - - if (!variant->instrlen) { - tu_cs_emit_pkt4(cs, reg, 2); - tu_cs_emit_qw(cs, 0); - return; - } - - assert(variant->type == stage); - - const uint64_t binary_iova = binary_bo->iova + binary_offset; - assert((binary_iova & 0x3) == 0); - - tu_cs_emit_pkt4(cs, reg, 2); - tu_cs_emit_qw(cs, binary_iova); - - /* always indirect */ - const bool indirect = true; - if (indirect) { - tu_cs_emit_pkt7(cs, opcode, 3); - tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) | - CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) | - CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | - CP_LOAD_STATE6_0_STATE_BLOCK(sb) | - CP_LOAD_STATE6_0_NUM_UNIT(variant->instrlen)); - tu_cs_emit_qw(cs, binary_iova); - } else { - const void *binary = binary_bo->map + binary_offset; - - tu_cs_emit_pkt7(cs, opcode, 3 + variant->info.sizedwords); - tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) | - CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) | - CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | - CP_LOAD_STATE6_0_STATE_BLOCK(sb) | - CP_LOAD_STATE6_0_NUM_UNIT(variant->instrlen)); - tu_cs_emit_qw(cs, 0); - tu_cs_emit_array(cs, binary, variant->info.sizedwords); - } -} - -static void -tu6_emit_program(struct tu_cs *cs, - const struct tu_pipeline_builder *builder, - const struct tu_bo *binary_bo, - bool binning_pass) -{ - static const struct ir3_shader_variant dummy_variant = { - .type = MESA_SHADER_NONE - }; - assert(builder->shaders[MESA_SHADER_VERTEX]); - const struct ir3_shader_variant *vs = - &builder->shaders[MESA_SHADER_VERTEX]->variants[0]; - const struct ir3_shader_variant *hs = - builder->shaders[MESA_SHADER_TESS_CTRL] - ? &builder->shaders[MESA_SHADER_TESS_CTRL]->variants[0] - : &dummy_variant; - const struct ir3_shader_variant *ds = - builder->shaders[MESA_SHADER_TESS_EVAL] - ? &builder->shaders[MESA_SHADER_TESS_EVAL]->variants[0] - : &dummy_variant; - const struct ir3_shader_variant *gs = - builder->shaders[MESA_SHADER_GEOMETRY] - ? &builder->shaders[MESA_SHADER_GEOMETRY]->variants[0] - : &dummy_variant; - const struct ir3_shader_variant *fs = - builder->shaders[MESA_SHADER_FRAGMENT] - ? &builder->shaders[MESA_SHADER_FRAGMENT]->variants[0] - : &dummy_variant; - - if (binning_pass) { - vs = &builder->shaders[MESA_SHADER_VERTEX]->variants[1]; - fs = &dummy_variant; - } - - tu6_emit_vs_config(cs, vs); - tu6_emit_hs_config(cs, hs); - tu6_emit_ds_config(cs, ds); - tu6_emit_gs_config(cs, gs); - tu6_emit_fs_config(cs, fs); - - tu6_emit_vs_system_values(cs, vs); - tu6_emit_vpc(cs, vs, fs, binning_pass); - tu6_emit_vpc_varying_modes(cs, fs, binning_pass); - tu6_emit_fs_system_values(cs, fs); - tu6_emit_fs_inputs(cs, fs); - tu6_emit_fs_outputs(cs, fs, builder->color_attachment_count); - - tu6_emit_shader_object(cs, MESA_SHADER_VERTEX, vs, binary_bo, - builder->shader_offsets[MESA_SHADER_VERTEX]); - - tu6_emit_shader_object(cs, MESA_SHADER_FRAGMENT, fs, binary_bo, - builder->shader_offsets[MESA_SHADER_FRAGMENT]); -} - -static void -tu6_emit_vertex_input(struct tu_cs *cs, - const struct ir3_shader_variant *vs, - const VkPipelineVertexInputStateCreateInfo *vi_info, - uint8_t bindings[MAX_VERTEX_ATTRIBS], - uint16_t strides[MAX_VERTEX_ATTRIBS], - uint16_t offsets[MAX_VERTEX_ATTRIBS], - uint32_t *count) -{ - uint32_t vfd_decode_idx = 0; - - /* why do we go beyond inputs_count? */ - assert(vs->inputs_count + 1 <= MAX_VERTEX_ATTRIBS); - for (uint32_t i = 0; i <= vs->inputs_count; i++) { - if (vs->inputs[i].sysval || !vs->inputs[i].compmask) - continue; - - const VkVertexInputAttributeDescription *vi_attr = - tu_find_vertex_input_attribute(vi_info, vs->inputs[i].slot); - const VkVertexInputBindingDescription *vi_binding = - tu_find_vertex_input_binding(vi_info, vi_attr); - assert(vi_attr && vi_binding); - - const struct tu_native_format *format = - tu6_get_native_format(vi_attr->format); - assert(format && format->vtx >= 0); - - uint32_t vfd_decode = A6XX_VFD_DECODE_INSTR_IDX(vfd_decode_idx) | - A6XX_VFD_DECODE_INSTR_FORMAT(format->vtx) | - A6XX_VFD_DECODE_INSTR_SWAP(format->swap) | - A6XX_VFD_DECODE_INSTR_UNK30; - if (vi_binding->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE) - vfd_decode |= A6XX_VFD_DECODE_INSTR_INSTANCED; - if (!vk_format_is_int(vi_attr->format)) - vfd_decode |= A6XX_VFD_DECODE_INSTR_FLOAT; - - const uint32_t vfd_decode_step_rate = 1; - - const uint32_t vfd_dest_cntl = - A6XX_VFD_DEST_CNTL_INSTR_WRITEMASK(vs->inputs[i].compmask) | - A6XX_VFD_DEST_CNTL_INSTR_REGID(vs->inputs[i].regid); - - tu_cs_emit_pkt4(cs, REG_A6XX_VFD_DECODE(vfd_decode_idx), 2); - tu_cs_emit(cs, vfd_decode); - tu_cs_emit(cs, vfd_decode_step_rate); - - tu_cs_emit_pkt4(cs, REG_A6XX_VFD_DEST_CNTL(vfd_decode_idx), 1); - tu_cs_emit(cs, vfd_dest_cntl); - - bindings[vfd_decode_idx] = vi_binding->binding; - strides[vfd_decode_idx] = vi_binding->stride; - offsets[vfd_decode_idx] = vi_attr->offset; - - vfd_decode_idx++; - } - - tu_cs_emit_pkt4(cs, REG_A6XX_VFD_CONTROL_0, 1); - tu_cs_emit( - cs, A6XX_VFD_CONTROL_0_VTXCNT(vfd_decode_idx) | (vfd_decode_idx << 8)); - - *count = vfd_decode_idx; -} - -static uint32_t -tu6_guardband_adj(uint32_t v) -{ - if (v > 256) - return (uint32_t)(511.0 - 65.0 * (log2(v) - 8.0)); - else - return 511; -} - -void -tu6_emit_viewport(struct tu_cs *cs, const VkViewport *viewport) -{ - float offsets[3]; - float scales[3]; - scales[0] = viewport->width / 2.0f; - scales[1] = viewport->height / 2.0f; - scales[2] = viewport->maxDepth - viewport->minDepth; - offsets[0] = viewport->x + scales[0]; - offsets[1] = viewport->y + scales[1]; - offsets[2] = viewport->minDepth; - - VkOffset2D min; - VkOffset2D max; - min.x = (int32_t) viewport->x; - max.x = (int32_t) ceilf(viewport->x + viewport->width); - if (viewport->height >= 0.0f) { - min.y = (int32_t) viewport->y; - max.y = (int32_t) ceilf(viewport->y + viewport->height); - } else { - min.y = (int32_t)(viewport->y + viewport->height); - max.y = (int32_t) ceilf(viewport->y); - } - /* the spec allows viewport->height to be 0.0f */ - if (min.y == max.y) - max.y++; - assert(min.x >= 0 && min.x < max.x); - assert(min.y >= 0 && min.y < max.y); - - VkExtent2D guardband_adj; - guardband_adj.width = tu6_guardband_adj(max.x - min.x); - guardband_adj.height = tu6_guardband_adj(max.y - min.y); - - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CL_VPORT_XOFFSET_0, 6); - tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_XOFFSET_0(offsets[0])); - tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_XSCALE_0(scales[0])); - tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_YOFFSET_0(offsets[1])); - tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_YSCALE_0(scales[1])); - tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_ZOFFSET_0(offsets[2])); - tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_ZSCALE_0(scales[2])); - - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0, 2); - tu_cs_emit(cs, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(min.x) | - A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(min.y)); - tu_cs_emit(cs, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(max.x - 1) | - A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(max.y - 1)); - - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ, 1); - tu_cs_emit(cs, - A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ(guardband_adj.width) | - A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT(guardband_adj.height)); -} - -void -tu6_emit_scissor(struct tu_cs *cs, const VkRect2D *scissor) -{ - const VkOffset2D min = scissor->offset; - const VkOffset2D max = { - scissor->offset.x + scissor->extent.width, - scissor->offset.y + scissor->extent.height, - }; - - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0, 2); - tu_cs_emit(cs, A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(min.x) | - A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(min.y)); - tu_cs_emit(cs, A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(max.x - 1) | - A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(max.y - 1)); -} - -static void -tu6_emit_gras_unknowns(struct tu_cs *cs) -{ - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_UNKNOWN_8000, 1); - tu_cs_emit(cs, 0x80); - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_UNKNOWN_8001, 1); - tu_cs_emit(cs, 0x0); - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_UNKNOWN_8004, 1); - tu_cs_emit(cs, 0x0); -} - -static void -tu6_emit_point_size(struct tu_cs *cs) -{ - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_POINT_MINMAX, 2); - tu_cs_emit(cs, A6XX_GRAS_SU_POINT_MINMAX_MIN(1.0f / 16.0f) | - A6XX_GRAS_SU_POINT_MINMAX_MAX(4092.0f)); - tu_cs_emit(cs, A6XX_GRAS_SU_POINT_SIZE(1.0f)); -} - -static uint32_t -tu6_gras_su_cntl(const VkPipelineRasterizationStateCreateInfo *rast_info, - VkSampleCountFlagBits samples) -{ - uint32_t gras_su_cntl = 0; - - if (rast_info->cullMode & VK_CULL_MODE_FRONT_BIT) - gras_su_cntl |= A6XX_GRAS_SU_CNTL_CULL_FRONT; - if (rast_info->cullMode & VK_CULL_MODE_BACK_BIT) - gras_su_cntl |= A6XX_GRAS_SU_CNTL_CULL_BACK; - - if (rast_info->frontFace == VK_FRONT_FACE_CLOCKWISE) - gras_su_cntl |= A6XX_GRAS_SU_CNTL_FRONT_CW; - - /* don't set A6XX_GRAS_SU_CNTL_LINEHALFWIDTH */ - - if (rast_info->depthBiasEnable) - gras_su_cntl |= A6XX_GRAS_SU_CNTL_POLY_OFFSET; - - if (samples > VK_SAMPLE_COUNT_1_BIT) - gras_su_cntl |= A6XX_GRAS_SU_CNTL_MSAA_ENABLE; - - return gras_su_cntl; -} - -void -tu6_emit_gras_su_cntl(struct tu_cs *cs, - uint32_t gras_su_cntl, - float line_width) -{ - assert((gras_su_cntl & A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK) == 0); - gras_su_cntl |= A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(line_width / 2.0f); - - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_CNTL, 1); - tu_cs_emit(cs, gras_su_cntl); -} - -void -tu6_emit_depth_bias(struct tu_cs *cs, - float constant_factor, - float clamp, - float slope_factor) -{ - tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_POLY_OFFSET_SCALE, 3); - tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_SCALE(slope_factor)); - tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_OFFSET(constant_factor)); - tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(clamp)); -} - -static void -tu6_emit_alpha_control_disable(struct tu_cs *cs) -{ - tu_cs_emit_pkt4(cs, REG_A6XX_RB_ALPHA_CONTROL, 1); - tu_cs_emit(cs, 0); -} - -static void -tu6_emit_depth_control(struct tu_cs *cs, - const VkPipelineDepthStencilStateCreateInfo *ds_info) -{ - assert(!ds_info->depthBoundsTestEnable); - - uint32_t rb_depth_cntl = 0; - if (ds_info->depthTestEnable) { - rb_depth_cntl |= - A6XX_RB_DEPTH_CNTL_Z_ENABLE | - A6XX_RB_DEPTH_CNTL_ZFUNC(tu6_compare_func(ds_info->depthCompareOp)) | - A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE; - - if (ds_info->depthWriteEnable) - rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE; - } - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_DEPTH_CNTL, 1); - tu_cs_emit(cs, rb_depth_cntl); -} - -static void -tu6_emit_stencil_control(struct tu_cs *cs, - const VkPipelineDepthStencilStateCreateInfo *ds_info) -{ - uint32_t rb_stencil_control = 0; - if (ds_info->stencilTestEnable) { - const VkStencilOpState *front = &ds_info->front; - const VkStencilOpState *back = &ds_info->back; - rb_stencil_control |= - A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE | - A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF | - A6XX_RB_STENCIL_CONTROL_STENCIL_READ | - A6XX_RB_STENCIL_CONTROL_FUNC(tu6_compare_func(front->compareOp)) | - A6XX_RB_STENCIL_CONTROL_FAIL(tu6_stencil_op(front->failOp)) | - A6XX_RB_STENCIL_CONTROL_ZPASS(tu6_stencil_op(front->passOp)) | - A6XX_RB_STENCIL_CONTROL_ZFAIL(tu6_stencil_op(front->depthFailOp)) | - A6XX_RB_STENCIL_CONTROL_FUNC_BF(tu6_compare_func(back->compareOp)) | - A6XX_RB_STENCIL_CONTROL_FAIL_BF(tu6_stencil_op(back->failOp)) | - A6XX_RB_STENCIL_CONTROL_ZPASS_BF(tu6_stencil_op(back->passOp)) | - A6XX_RB_STENCIL_CONTROL_ZFAIL_BF(tu6_stencil_op(back->depthFailOp)); - } - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCIL_CONTROL, 1); - tu_cs_emit(cs, rb_stencil_control); -} - -void -tu6_emit_stencil_compare_mask(struct tu_cs *cs, uint32_t front, uint32_t back) -{ - tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCILMASK, 1); - tu_cs_emit( - cs, A6XX_RB_STENCILMASK_MASK(front) | A6XX_RB_STENCILMASK_BFMASK(back)); -} - -void -tu6_emit_stencil_write_mask(struct tu_cs *cs, uint32_t front, uint32_t back) -{ - tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCILWRMASK, 1); - tu_cs_emit(cs, A6XX_RB_STENCILWRMASK_WRMASK(front) | - A6XX_RB_STENCILWRMASK_BFWRMASK(back)); -} - -void -tu6_emit_stencil_reference(struct tu_cs *cs, uint32_t front, uint32_t back) -{ - tu_cs_emit_pkt4(cs, REG_A6XX_RB_STENCILREF, 1); - tu_cs_emit(cs, - A6XX_RB_STENCILREF_REF(front) | A6XX_RB_STENCILREF_BFREF(back)); -} - -static uint32_t -tu6_rb_mrt_blend_control(const VkPipelineColorBlendAttachmentState *att, - bool has_alpha) -{ - const enum a3xx_rb_blend_opcode color_op = tu6_blend_op(att->colorBlendOp); - const enum adreno_rb_blend_factor src_color_factor = tu6_blend_factor( - has_alpha ? att->srcColorBlendFactor - : tu_blend_factor_no_dst_alpha(att->srcColorBlendFactor)); - const enum adreno_rb_blend_factor dst_color_factor = tu6_blend_factor( - has_alpha ? att->dstColorBlendFactor - : tu_blend_factor_no_dst_alpha(att->dstColorBlendFactor)); - const enum a3xx_rb_blend_opcode alpha_op = tu6_blend_op(att->alphaBlendOp); - const enum adreno_rb_blend_factor src_alpha_factor = - tu6_blend_factor(att->srcAlphaBlendFactor); - const enum adreno_rb_blend_factor dst_alpha_factor = - tu6_blend_factor(att->dstAlphaBlendFactor); - - return A6XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(src_color_factor) | - A6XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(color_op) | - A6XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(dst_color_factor) | - A6XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(src_alpha_factor) | - A6XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(alpha_op) | - A6XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(dst_alpha_factor); -} - -static uint32_t -tu6_rb_mrt_control(const VkPipelineColorBlendAttachmentState *att, - uint32_t rb_mrt_control_rop, - bool is_int, - bool has_alpha) -{ - uint32_t rb_mrt_control = - A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE(att->colorWriteMask); - - /* ignore blending and logic op for integer attachments */ - if (is_int) { - rb_mrt_control |= A6XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY); - return rb_mrt_control; - } - - rb_mrt_control |= rb_mrt_control_rop; - - if (att->blendEnable) { - rb_mrt_control |= A6XX_RB_MRT_CONTROL_BLEND; - - if (has_alpha) - rb_mrt_control |= A6XX_RB_MRT_CONTROL_BLEND2; - } - - return rb_mrt_control; -} - -static void -tu6_emit_rb_mrt_controls(struct tu_cs *cs, - const VkPipelineColorBlendStateCreateInfo *blend_info, - const VkFormat attachment_formats[MAX_RTS], - uint32_t *blend_enable_mask) -{ - *blend_enable_mask = 0; - - bool rop_reads_dst = false; - uint32_t rb_mrt_control_rop = 0; - if (blend_info->logicOpEnable) { - rop_reads_dst = tu_logic_op_reads_dst(blend_info->logicOp); - rb_mrt_control_rop = - A6XX_RB_MRT_CONTROL_ROP_ENABLE | - A6XX_RB_MRT_CONTROL_ROP_CODE(tu6_rop(blend_info->logicOp)); - } - - for (uint32_t i = 0; i < blend_info->attachmentCount; i++) { - const VkPipelineColorBlendAttachmentState *att = - &blend_info->pAttachments[i]; - const VkFormat format = attachment_formats[i]; - - uint32_t rb_mrt_control = 0; - uint32_t rb_mrt_blend_control = 0; - if (format != VK_FORMAT_UNDEFINED) { - const bool is_int = vk_format_is_int(format); - const bool has_alpha = vk_format_has_alpha(format); - - rb_mrt_control = - tu6_rb_mrt_control(att, rb_mrt_control_rop, is_int, has_alpha); - rb_mrt_blend_control = tu6_rb_mrt_blend_control(att, has_alpha); - - if (att->blendEnable || rop_reads_dst) - *blend_enable_mask |= 1 << i; - } - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_CONTROL(i), 2); - tu_cs_emit(cs, rb_mrt_control); - tu_cs_emit(cs, rb_mrt_blend_control); - } - - for (uint32_t i = blend_info->attachmentCount; i < MAX_RTS; i++) { - tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_CONTROL(i), 2); - tu_cs_emit(cs, 0); - tu_cs_emit(cs, 0); - } -} - -static void -tu6_emit_blend_control(struct tu_cs *cs, - uint32_t blend_enable_mask, - const VkPipelineMultisampleStateCreateInfo *msaa_info) -{ - assert(!msaa_info->sampleShadingEnable); - assert(!msaa_info->alphaToOneEnable); - - uint32_t sp_blend_cntl = A6XX_SP_BLEND_CNTL_UNK8; - if (blend_enable_mask) - sp_blend_cntl |= A6XX_SP_BLEND_CNTL_ENABLED; - if (msaa_info->alphaToCoverageEnable) - sp_blend_cntl |= A6XX_SP_BLEND_CNTL_ALPHA_TO_COVERAGE; - - const uint32_t sample_mask = - msaa_info->pSampleMask ? *msaa_info->pSampleMask - : ((1 << msaa_info->rasterizationSamples) - 1); - - /* set A6XX_RB_BLEND_CNTL_INDEPENDENT_BLEND only when enabled? */ - uint32_t rb_blend_cntl = - A6XX_RB_BLEND_CNTL_ENABLE_BLEND(blend_enable_mask) | - A6XX_RB_BLEND_CNTL_INDEPENDENT_BLEND | - A6XX_RB_BLEND_CNTL_SAMPLE_MASK(sample_mask); - if (msaa_info->alphaToCoverageEnable) - rb_blend_cntl |= A6XX_RB_BLEND_CNTL_ALPHA_TO_COVERAGE; - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_BLEND_CNTL, 1); - tu_cs_emit(cs, sp_blend_cntl); - - tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLEND_CNTL, 1); - tu_cs_emit(cs, rb_blend_cntl); -} - -void -tu6_emit_blend_constants(struct tu_cs *cs, const float constants[4]) -{ - tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLEND_RED_F32, 4); - tu_cs_emit_array(cs, (const uint32_t *) constants, 4); -} - -static VkResult -tu_pipeline_builder_create_pipeline(struct tu_pipeline_builder *builder, - struct tu_pipeline **out_pipeline) -{ - struct tu_device *dev = builder->device; - - struct tu_pipeline *pipeline = - vk_zalloc2(&dev->alloc, builder->alloc, sizeof(*pipeline), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (!pipeline) - return VK_ERROR_OUT_OF_HOST_MEMORY; - - tu_cs_init(&pipeline->cs, TU_CS_MODE_SUB_STREAM, 2048); - - /* reserve the space now such that tu_cs_begin_sub_stream never fails */ - VkResult result = tu_cs_reserve_space(dev, &pipeline->cs, 2048); - if (result != VK_SUCCESS) { - vk_free2(&dev->alloc, builder->alloc, pipeline); - return result; - } - - *out_pipeline = pipeline; - - return VK_SUCCESS; -} - -static VkResult -tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder) -{ - const VkPipelineShaderStageCreateInfo *stage_infos[MESA_SHADER_STAGES] = { - NULL - }; - for (uint32_t i = 0; i < builder->create_info->stageCount; i++) { - gl_shader_stage stage = - tu_shader_stage(builder->create_info->pStages[i].stage); - stage_infos[stage] = &builder->create_info->pStages[i]; - } - - struct tu_shader_compile_options options; - tu_shader_compile_options_init(&options, builder->create_info); - - /* compile shaders in reverse order */ - struct tu_shader *next_stage_shader = NULL; - for (gl_shader_stage stage = MESA_SHADER_STAGES - 1; - stage > MESA_SHADER_NONE; stage--) { - const VkPipelineShaderStageCreateInfo *stage_info = stage_infos[stage]; - if (!stage_info) - continue; - - struct tu_shader *shader = - tu_shader_create(builder->device, stage, stage_info, builder->alloc); - if (!shader) - return VK_ERROR_OUT_OF_HOST_MEMORY; - - VkResult result = - tu_shader_compile(builder->device, shader, next_stage_shader, - &options, builder->alloc); - if (result != VK_SUCCESS) - return result; - - builder->shaders[stage] = shader; - builder->shader_offsets[stage] = builder->shader_total_size; - builder->shader_total_size += - sizeof(uint32_t) * shader->variants[0].info.sizedwords; - - next_stage_shader = shader; - } - - if (builder->shaders[MESA_SHADER_VERTEX]->has_binning_pass) { - const struct tu_shader *vs = builder->shaders[MESA_SHADER_VERTEX]; - builder->binning_vs_offset = builder->shader_total_size; - builder->shader_total_size += - sizeof(uint32_t) * vs->variants[1].info.sizedwords; - } - - return VK_SUCCESS; -} - -static VkResult -tu_pipeline_builder_upload_shaders(struct tu_pipeline_builder *builder, - struct tu_pipeline *pipeline) -{ - struct tu_bo *bo = &pipeline->program.binary_bo; - - VkResult result = - tu_bo_init_new(builder->device, bo, builder->shader_total_size); - if (result != VK_SUCCESS) - return result; - - result = tu_bo_map(builder->device, bo); - if (result != VK_SUCCESS) - return result; - - for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) { - const struct tu_shader *shader = builder->shaders[i]; - if (!shader) - continue; - - memcpy(bo->map + builder->shader_offsets[i], shader->binary, - sizeof(uint32_t) * shader->variants[0].info.sizedwords); - } - - if (builder->shaders[MESA_SHADER_VERTEX]->has_binning_pass) { - const struct tu_shader *vs = builder->shaders[MESA_SHADER_VERTEX]; - memcpy(bo->map + builder->binning_vs_offset, vs->binning_binary, - sizeof(uint32_t) * vs->variants[1].info.sizedwords); - } - - return VK_SUCCESS; -} - -static void -tu_pipeline_builder_parse_dynamic(struct tu_pipeline_builder *builder, - struct tu_pipeline *pipeline) -{ - const VkPipelineDynamicStateCreateInfo *dynamic_info = - builder->create_info->pDynamicState; - - if (!dynamic_info) - return; - - for (uint32_t i = 0; i < dynamic_info->dynamicStateCount; i++) { - pipeline->dynamic_state.mask |= - tu_dynamic_state_bit(dynamic_info->pDynamicStates[i]); - } -} - -static void -tu_pipeline_builder_parse_shader_stages(struct tu_pipeline_builder *builder, - struct tu_pipeline *pipeline) -{ - struct tu_cs prog_cs; - tu_cs_begin_sub_stream(builder->device, &pipeline->cs, 512, &prog_cs); - tu6_emit_program(&prog_cs, builder, &pipeline->program.binary_bo, false); - pipeline->program.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &prog_cs); - - tu_cs_begin_sub_stream(builder->device, &pipeline->cs, 512, &prog_cs); - tu6_emit_program(&prog_cs, builder, &pipeline->program.binary_bo, true); - pipeline->program.binning_state_ib = - tu_cs_end_sub_stream(&pipeline->cs, &prog_cs); -} - -static void -tu_pipeline_builder_parse_vertex_input(struct tu_pipeline_builder *builder, - struct tu_pipeline *pipeline) -{ - const VkPipelineVertexInputStateCreateInfo *vi_info = - builder->create_info->pVertexInputState; - const struct tu_shader *vs = builder->shaders[MESA_SHADER_VERTEX]; - - struct tu_cs vi_cs; - tu_cs_begin_sub_stream(builder->device, &pipeline->cs, - MAX_VERTEX_ATTRIBS * 5 + 2, &vi_cs); - tu6_emit_vertex_input(&vi_cs, &vs->variants[0], vi_info, - pipeline->vi.bindings, pipeline->vi.strides, - pipeline->vi.offsets, &pipeline->vi.count); - pipeline->vi.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &vi_cs); - - if (vs->has_binning_pass) { - tu_cs_begin_sub_stream(builder->device, &pipeline->cs, - MAX_VERTEX_ATTRIBS * 5 + 2, &vi_cs); - tu6_emit_vertex_input( - &vi_cs, &vs->variants[1], vi_info, pipeline->vi.binning_bindings, - pipeline->vi.binning_strides, pipeline->vi.binning_offsets, - &pipeline->vi.binning_count); - pipeline->vi.binning_state_ib = - tu_cs_end_sub_stream(&pipeline->cs, &vi_cs); - } -} - -static void -tu_pipeline_builder_parse_input_assembly(struct tu_pipeline_builder *builder, - struct tu_pipeline *pipeline) -{ - const VkPipelineInputAssemblyStateCreateInfo *ia_info = - builder->create_info->pInputAssemblyState; - - pipeline->ia.primtype = tu6_primtype(ia_info->topology); - pipeline->ia.primitive_restart = ia_info->primitiveRestartEnable; -} - -static void -tu_pipeline_builder_parse_viewport(struct tu_pipeline_builder *builder, - struct tu_pipeline *pipeline) -{ - /* The spec says: - * - * pViewportState is a pointer to an instance of the - * VkPipelineViewportStateCreateInfo structure, and is ignored if the - * pipeline has rasterization disabled." - * - * We leave the relevant registers stale in that case. - */ - if (builder->rasterizer_discard) - return; - - const VkPipelineViewportStateCreateInfo *vp_info = - builder->create_info->pViewportState; - - struct tu_cs vp_cs; - tu_cs_begin_sub_stream(builder->device, &pipeline->cs, 15, &vp_cs); - - if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_VIEWPORT)) { - assert(vp_info->viewportCount == 1); - tu6_emit_viewport(&vp_cs, vp_info->pViewports); - } - - if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_SCISSOR)) { - assert(vp_info->scissorCount == 1); - tu6_emit_scissor(&vp_cs, vp_info->pScissors); - } - - pipeline->vp.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &vp_cs); -} - -static void -tu_pipeline_builder_parse_rasterization(struct tu_pipeline_builder *builder, - struct tu_pipeline *pipeline) -{ - const VkPipelineRasterizationStateCreateInfo *rast_info = - builder->create_info->pRasterizationState; - - assert(!rast_info->depthClampEnable); - assert(rast_info->polygonMode == VK_POLYGON_MODE_FILL); - - struct tu_cs rast_cs; - tu_cs_begin_sub_stream(builder->device, &pipeline->cs, 20, &rast_cs); - - /* move to hw ctx init? */ - tu6_emit_gras_unknowns(&rast_cs); - tu6_emit_point_size(&rast_cs); - - const uint32_t gras_su_cntl = - tu6_gras_su_cntl(rast_info, builder->samples); - - if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_LINE_WIDTH)) - tu6_emit_gras_su_cntl(&rast_cs, gras_su_cntl, rast_info->lineWidth); - - if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_DEPTH_BIAS)) { - tu6_emit_depth_bias(&rast_cs, rast_info->depthBiasConstantFactor, - rast_info->depthBiasClamp, - rast_info->depthBiasSlopeFactor); - } - - pipeline->rast.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &rast_cs); - - pipeline->rast.gras_su_cntl = gras_su_cntl; -} - -static void -tu_pipeline_builder_parse_depth_stencil(struct tu_pipeline_builder *builder, - struct tu_pipeline *pipeline) -{ - /* The spec says: - * - * pDepthStencilState is a pointer to an instance of the - * VkPipelineDepthStencilStateCreateInfo structure, and is ignored if - * the pipeline has rasterization disabled or if the subpass of the - * render pass the pipeline is created against does not use a - * depth/stencil attachment. - * - * We disable both depth and stenil tests in those cases. - */ - static const VkPipelineDepthStencilStateCreateInfo dummy_ds_info; - const VkPipelineDepthStencilStateCreateInfo *ds_info = - builder->use_depth_stencil_attachment - ? builder->create_info->pDepthStencilState - : &dummy_ds_info; - - struct tu_cs ds_cs; - tu_cs_begin_sub_stream(builder->device, &pipeline->cs, 12, &ds_cs); - - /* move to hw ctx init? */ - tu6_emit_alpha_control_disable(&ds_cs); - - tu6_emit_depth_control(&ds_cs, ds_info); - tu6_emit_stencil_control(&ds_cs, ds_info); - - if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_COMPARE_MASK)) { - tu6_emit_stencil_compare_mask(&ds_cs, ds_info->front.compareMask, - ds_info->back.compareMask); - } - if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_WRITE_MASK)) { - tu6_emit_stencil_write_mask(&ds_cs, ds_info->front.writeMask, - ds_info->back.writeMask); - } - if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_STENCIL_REFERENCE)) { - tu6_emit_stencil_reference(&ds_cs, ds_info->front.reference, - ds_info->back.reference); - } - - pipeline->ds.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &ds_cs); -} - -static void -tu_pipeline_builder_parse_multisample_and_color_blend( - struct tu_pipeline_builder *builder, struct tu_pipeline *pipeline) -{ - /* The spec says: - * - * pMultisampleState is a pointer to an instance of the - * VkPipelineMultisampleStateCreateInfo, and is ignored if the pipeline - * has rasterization disabled. - * - * Also, - * - * pColorBlendState is a pointer to an instance of the - * VkPipelineColorBlendStateCreateInfo structure, and is ignored if the - * pipeline has rasterization disabled or if the subpass of the render - * pass the pipeline is created against does not use any color - * attachments. - * - * We leave the relevant registers stale when rasterization is disabled. - */ - if (builder->rasterizer_discard) - return; - - static const VkPipelineColorBlendStateCreateInfo dummy_blend_info; - const VkPipelineMultisampleStateCreateInfo *msaa_info = - builder->create_info->pMultisampleState; - const VkPipelineColorBlendStateCreateInfo *blend_info = - builder->use_color_attachments ? builder->create_info->pColorBlendState - : &dummy_blend_info; - - struct tu_cs blend_cs; - tu_cs_begin_sub_stream(builder->device, &pipeline->cs, MAX_RTS * 3 + 9, - &blend_cs); - - uint32_t blend_enable_mask; - tu6_emit_rb_mrt_controls(&blend_cs, blend_info, - builder->color_attachment_formats, - &blend_enable_mask); - - if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_BLEND_CONSTANTS)) - tu6_emit_blend_constants(&blend_cs, blend_info->blendConstants); - - tu6_emit_blend_control(&blend_cs, blend_enable_mask, msaa_info); - - pipeline->blend.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &blend_cs); -} - -static void -tu_pipeline_finish(struct tu_pipeline *pipeline, - struct tu_device *dev, - const VkAllocationCallbacks *alloc) -{ - tu_cs_finish(dev, &pipeline->cs); - - if (pipeline->program.binary_bo.gem_handle) - tu_bo_finish(dev, &pipeline->program.binary_bo); -} - -static VkResult -tu_pipeline_builder_build(struct tu_pipeline_builder *builder, - struct tu_pipeline **pipeline) -{ - VkResult result = tu_pipeline_builder_create_pipeline(builder, pipeline); - if (result != VK_SUCCESS) - return result; - - /* compile and upload shaders */ - result = tu_pipeline_builder_compile_shaders(builder); - if (result == VK_SUCCESS) - result = tu_pipeline_builder_upload_shaders(builder, *pipeline); - if (result != VK_SUCCESS) { - tu_pipeline_finish(*pipeline, builder->device, builder->alloc); - vk_free2(&builder->device->alloc, builder->alloc, *pipeline); - *pipeline = VK_NULL_HANDLE; - - return result; - } - - tu_pipeline_builder_parse_dynamic(builder, *pipeline); - tu_pipeline_builder_parse_shader_stages(builder, *pipeline); - tu_pipeline_builder_parse_vertex_input(builder, *pipeline); - tu_pipeline_builder_parse_input_assembly(builder, *pipeline); - tu_pipeline_builder_parse_viewport(builder, *pipeline); - tu_pipeline_builder_parse_rasterization(builder, *pipeline); - tu_pipeline_builder_parse_depth_stencil(builder, *pipeline); - tu_pipeline_builder_parse_multisample_and_color_blend(builder, *pipeline); - - /* we should have reserved enough space upfront such that the CS never - * grows - */ - assert((*pipeline)->cs.bo_count == 1); - - return VK_SUCCESS; -} - -static void -tu_pipeline_builder_finish(struct tu_pipeline_builder *builder) -{ - for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) { - if (!builder->shaders[i]) - continue; - tu_shader_destroy(builder->device, builder->shaders[i], builder->alloc); - } -} - -static void -tu_pipeline_builder_init_graphics( - struct tu_pipeline_builder *builder, - struct tu_device *dev, - struct tu_pipeline_cache *cache, - const VkGraphicsPipelineCreateInfo *create_info, - const VkAllocationCallbacks *alloc) -{ - *builder = (struct tu_pipeline_builder) { - .device = dev, - .cache = cache, - .create_info = create_info, - .alloc = alloc, - }; - - builder->rasterizer_discard = - create_info->pRasterizationState->rasterizerDiscardEnable; - - if (builder->rasterizer_discard) { - builder->samples = VK_SAMPLE_COUNT_1_BIT; - } else { - builder->samples = create_info->pMultisampleState->rasterizationSamples; - - const struct tu_render_pass *pass = - tu_render_pass_from_handle(create_info->renderPass); - const struct tu_subpass *subpass = - &pass->subpasses[create_info->subpass]; - - builder->use_depth_stencil_attachment = - subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED; - - assert(subpass->color_count == - create_info->pColorBlendState->attachmentCount); - builder->color_attachment_count = subpass->color_count; - for (uint32_t i = 0; i < subpass->color_count; i++) { - const uint32_t a = subpass->color_attachments[i].attachment; - if (a == VK_ATTACHMENT_UNUSED) - continue; - - builder->color_attachment_formats[i] = pass->attachments[a].format; - builder->use_color_attachments = true; - } - } -} - -VkResult -tu_CreateGraphicsPipelines(VkDevice device, - VkPipelineCache pipelineCache, - uint32_t count, - const VkGraphicsPipelineCreateInfo *pCreateInfos, - const VkAllocationCallbacks *pAllocator, - VkPipeline *pPipelines) -{ - TU_FROM_HANDLE(tu_device, dev, device); - TU_FROM_HANDLE(tu_pipeline_cache, cache, pipelineCache); - - for (uint32_t i = 0; i < count; i++) { - struct tu_pipeline_builder builder; - tu_pipeline_builder_init_graphics(&builder, dev, cache, - &pCreateInfos[i], pAllocator); - - struct tu_pipeline *pipeline; - VkResult result = tu_pipeline_builder_build(&builder, &pipeline); - tu_pipeline_builder_finish(&builder); - - if (result != VK_SUCCESS) { - for (uint32_t j = 0; j < i; j++) { - tu_DestroyPipeline(device, pPipelines[j], pAllocator); - pPipelines[j] = VK_NULL_HANDLE; - } - - return result; - } - - pPipelines[i] = tu_pipeline_to_handle(pipeline); - } - - return VK_SUCCESS; -} - -static VkResult -tu_compute_pipeline_create(VkDevice _device, - VkPipelineCache _cache, - const VkComputePipelineCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkPipeline *pPipeline) -{ - return VK_SUCCESS; -} - -VkResult -tu_CreateComputePipelines(VkDevice _device, - VkPipelineCache pipelineCache, - uint32_t count, - const VkComputePipelineCreateInfo *pCreateInfos, - const VkAllocationCallbacks *pAllocator, - VkPipeline *pPipelines) -{ - VkResult result = VK_SUCCESS; - - unsigned i = 0; - for (; i < count; i++) { - VkResult r; - r = tu_compute_pipeline_create(_device, pipelineCache, &pCreateInfos[i], - pAllocator, &pPipelines[i]); - if (r != VK_SUCCESS) { - result = r; - pPipelines[i] = VK_NULL_HANDLE; - } - } - - return result; -} - -void -tu_DestroyPipeline(VkDevice _device, - VkPipeline _pipeline, - const VkAllocationCallbacks *pAllocator) -{ - TU_FROM_HANDLE(tu_device, dev, _device); - TU_FROM_HANDLE(tu_pipeline, pipeline, _pipeline); - - if (!_pipeline) - return; - - tu_pipeline_finish(pipeline, dev, pAllocator); - vk_free2(&dev->alloc, pAllocator, pipeline); -} diff --git a/lib/mesa/src/freedreno/vulkan/tu_query.c b/lib/mesa/src/freedreno/vulkan/tu_query.c deleted file mode 100644 index 2cb710fb1..000000000 --- a/lib/mesa/src/freedreno/vulkan/tu_query.c +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyrigh 2016 Red Hat Inc. - * Based on anv: - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "tu_private.h" - -#include <assert.h> -#include <fcntl.h> -#include <stdbool.h> -#include <string.h> -#include <unistd.h> - -#include "nir/nir_builder.h" - -VkResult -tu_CreateQueryPool(VkDevice _device, - const VkQueryPoolCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkQueryPool *pQueryPool) -{ - TU_FROM_HANDLE(tu_device, device, _device); - struct tu_query_pool *pool = - vk_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - - if (!pool) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - - *pQueryPool = tu_query_pool_to_handle(pool); - return VK_SUCCESS; -} - -void -tu_DestroyQueryPool(VkDevice _device, - VkQueryPool _pool, - const VkAllocationCallbacks *pAllocator) -{ - TU_FROM_HANDLE(tu_device, device, _device); - TU_FROM_HANDLE(tu_query_pool, pool, _pool); - - if (!pool) - return; - - vk_free2(&device->alloc, pAllocator, pool); -} - -VkResult -tu_GetQueryPoolResults(VkDevice _device, - VkQueryPool queryPool, - uint32_t firstQuery, - uint32_t queryCount, - size_t dataSize, - void *pData, - VkDeviceSize stride, - VkQueryResultFlags flags) -{ - return VK_SUCCESS; -} - -void -tu_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, - VkQueryPool queryPool, - uint32_t firstQuery, - uint32_t queryCount, - VkBuffer dstBuffer, - VkDeviceSize dstOffset, - VkDeviceSize stride, - VkQueryResultFlags flags) -{ -} - -void -tu_CmdResetQueryPool(VkCommandBuffer commandBuffer, - VkQueryPool queryPool, - uint32_t firstQuery, - uint32_t queryCount) -{ -} - -void -tu_CmdBeginQuery(VkCommandBuffer commandBuffer, - VkQueryPool queryPool, - uint32_t query, - VkQueryControlFlags flags) -{ -} - -void -tu_CmdEndQuery(VkCommandBuffer commandBuffer, - VkQueryPool queryPool, - uint32_t query) -{ -} - -void -tu_CmdWriteTimestamp(VkCommandBuffer commandBuffer, - VkPipelineStageFlagBits pipelineStage, - VkQueryPool queryPool, - uint32_t query) -{ -} diff --git a/lib/mesa/src/freedreno/vulkan/tu_shader.c b/lib/mesa/src/freedreno/vulkan/tu_shader.c deleted file mode 100644 index f6e13d7c4..000000000 --- a/lib/mesa/src/freedreno/vulkan/tu_shader.c +++ /dev/null @@ -1,336 +0,0 @@ -/* - * Copyright © 2019 Google LLC - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "tu_private.h" - -#include "spirv/nir_spirv.h" -#include "util/mesa-sha1.h" - -#include "ir3/ir3_nir.h" - -static nir_shader * -tu_spirv_to_nir(struct ir3_compiler *compiler, - const uint32_t *words, - size_t word_count, - gl_shader_stage stage, - const char *entry_point_name, - const VkSpecializationInfo *spec_info) -{ - /* TODO these are made-up */ - const struct spirv_to_nir_options spirv_options = { - .lower_ubo_ssbo_access_to_offsets = true, - .caps = { false }, - }; - const nir_shader_compiler_options *nir_options = - ir3_get_compiler_options(compiler); - - /* convert VkSpecializationInfo */ - struct nir_spirv_specialization *spec = NULL; - uint32_t num_spec = 0; - if (spec_info && spec_info->mapEntryCount) { - spec = malloc(sizeof(*spec) * spec_info->mapEntryCount); - if (!spec) - return NULL; - - for (uint32_t i = 0; i < spec_info->mapEntryCount; i++) { - const VkSpecializationMapEntry *entry = &spec_info->pMapEntries[i]; - const void *data = spec_info->pData + entry->offset; - assert(data + entry->size <= spec_info->pData + spec_info->dataSize); - spec[i].id = entry->constantID; - if (entry->size == 8) - spec[i].data64 = *(const uint64_t *) data; - else - spec[i].data32 = *(const uint32_t *) data; - spec[i].defined_on_module = false; - } - - num_spec = spec_info->mapEntryCount; - } - - nir_shader *nir = - spirv_to_nir(words, word_count, spec, num_spec, stage, entry_point_name, - &spirv_options, nir_options); - - free(spec); - - assert(nir->info.stage == stage); - nir_validate_shader(nir, "after spirv_to_nir"); - - return nir; -} - -static void -tu_sort_variables_by_location(struct exec_list *variables) -{ - struct exec_list sorted; - exec_list_make_empty(&sorted); - - nir_foreach_variable_safe(var, variables) - { - exec_node_remove(&var->node); - - /* insert the variable into the sorted list */ - nir_variable *next = NULL; - nir_foreach_variable(tmp, &sorted) - { - if (var->data.location < tmp->data.location) { - next = tmp; - break; - } - } - if (next) - exec_node_insert_node_before(&next->node, &var->node); - else - exec_list_push_tail(&sorted, &var->node); - } - - exec_list_move_nodes_to(&sorted, variables); -} - -struct tu_shader * -tu_shader_create(struct tu_device *dev, - gl_shader_stage stage, - const VkPipelineShaderStageCreateInfo *stage_info, - const VkAllocationCallbacks *alloc) -{ - const struct tu_shader_module *module = - tu_shader_module_from_handle(stage_info->module); - struct tu_shader *shader; - - const uint32_t max_variant_count = (stage == MESA_SHADER_VERTEX) ? 2 : 1; - shader = vk_zalloc2( - &dev->alloc, alloc, - sizeof(*shader) + sizeof(struct ir3_shader_variant) * max_variant_count, - 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); - if (!shader) - return NULL; - - /* translate SPIR-V to NIR */ - assert(module->code_size % 4 == 0); - nir_shader *nir = tu_spirv_to_nir( - dev->compiler, (const uint32_t *) module->code, module->code_size / 4, - stage, stage_info->pName, stage_info->pSpecializationInfo); - if (!nir) { - vk_free2(&dev->alloc, alloc, shader); - return NULL; - } - - if (unlikely(dev->physical_device->instance->debug_flags & TU_DEBUG_NIR)) { - fprintf(stderr, "translated nir:\n"); - nir_print_shader(nir, stderr); - } - - /* TODO what needs to happen? */ - - switch (stage) { - case MESA_SHADER_VERTEX: - tu_sort_variables_by_location(&nir->outputs); - break; - case MESA_SHADER_TESS_CTRL: - case MESA_SHADER_TESS_EVAL: - case MESA_SHADER_GEOMETRY: - tu_sort_variables_by_location(&nir->inputs); - tu_sort_variables_by_location(&nir->outputs); - break; - case MESA_SHADER_FRAGMENT: - tu_sort_variables_by_location(&nir->inputs); - break; - case MESA_SHADER_COMPUTE: - break; - default: - unreachable("invalid gl_shader_stage"); - break; - } - - nir_assign_var_locations(&nir->inputs, &nir->num_inputs, - ir3_glsl_type_size); - nir_assign_var_locations(&nir->outputs, &nir->num_outputs, - ir3_glsl_type_size); - nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms, - ir3_glsl_type_size); - - NIR_PASS_V(nir, nir_lower_system_values); - NIR_PASS_V(nir, nir_lower_frexp); - NIR_PASS_V(nir, nir_lower_io, nir_var_all, ir3_glsl_type_size, 0); - - nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); - - shader->ir3_shader.compiler = dev->compiler; - shader->ir3_shader.type = stage; - shader->ir3_shader.nir = nir; - - return shader; -} - -void -tu_shader_destroy(struct tu_device *dev, - struct tu_shader *shader, - const VkAllocationCallbacks *alloc) -{ - if (shader->ir3_shader.nir) - ralloc_free(shader->ir3_shader.nir); - - for (uint32_t i = 0; i < 1 + shader->has_binning_pass; i++) { - if (shader->variants[i].ir) - ir3_destroy(shader->variants[i].ir); - } - - if (shader->ir3_shader.const_state.immediates) - free(shader->ir3_shader.const_state.immediates); - if (shader->binary) - free(shader->binary); - if (shader->binning_binary) - free(shader->binning_binary); - - vk_free2(&dev->alloc, alloc, shader); -} - -void -tu_shader_compile_options_init( - struct tu_shader_compile_options *options, - const VkGraphicsPipelineCreateInfo *pipeline_info) -{ - *options = (struct tu_shader_compile_options) { - /* TODO ir3_key */ - - .optimize = !(pipeline_info->flags & - VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT), - .include_binning_pass = true, - }; -} - -static uint32_t * -tu_compile_shader_variant(struct ir3_shader *shader, - const struct ir3_shader_key *key, - bool binning_pass, - struct ir3_shader_variant *variant) -{ - variant->shader = shader; - variant->type = shader->type; - variant->key = *key; - variant->binning_pass = binning_pass; - - int ret = ir3_compile_shader_nir(shader->compiler, variant); - if (ret) - return NULL; - - /* when assemble fails, we rely on tu_shader_destroy to clean up the - * variant - */ - return ir3_shader_assemble(variant, shader->compiler->gpu_id); -} - -VkResult -tu_shader_compile(struct tu_device *dev, - struct tu_shader *shader, - const struct tu_shader *next_stage, - const struct tu_shader_compile_options *options, - const VkAllocationCallbacks *alloc) -{ - if (options->optimize) { - /* ignore the key for the first pass of optimization */ - ir3_optimize_nir(&shader->ir3_shader, shader->ir3_shader.nir, NULL); - - if (unlikely(dev->physical_device->instance->debug_flags & - TU_DEBUG_NIR)) { - fprintf(stderr, "optimized nir:\n"); - nir_print_shader(shader->ir3_shader.nir, stderr); - } - } - - shader->binary = tu_compile_shader_variant( - &shader->ir3_shader, &options->key, false, &shader->variants[0]); - if (!shader->binary) - return VK_ERROR_OUT_OF_HOST_MEMORY; - - /* compile another variant for the binning pass */ - if (options->include_binning_pass && - shader->ir3_shader.type == MESA_SHADER_VERTEX) { - shader->binning_binary = tu_compile_shader_variant( - &shader->ir3_shader, &options->key, true, &shader->variants[1]); - if (!shader->binning_binary) - return VK_ERROR_OUT_OF_HOST_MEMORY; - - shader->has_binning_pass = true; - } - - if (unlikely(dev->physical_device->instance->debug_flags & TU_DEBUG_IR3)) { - fprintf(stderr, "disassembled ir3:\n"); - fprintf(stderr, "shader: %s\n", - gl_shader_stage_name(shader->ir3_shader.type)); - ir3_shader_disasm(&shader->variants[0], shader->binary, stderr); - - if (shader->has_binning_pass) { - fprintf(stderr, "disassembled ir3:\n"); - fprintf(stderr, "shader: %s (binning)\n", - gl_shader_stage_name(shader->ir3_shader.type)); - ir3_shader_disasm(&shader->variants[1], shader->binning_binary, - stderr); - } - } - - return VK_SUCCESS; -} - -VkResult -tu_CreateShaderModule(VkDevice _device, - const VkShaderModuleCreateInfo *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkShaderModule *pShaderModule) -{ - TU_FROM_HANDLE(tu_device, device, _device); - struct tu_shader_module *module; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO); - assert(pCreateInfo->flags == 0); - assert(pCreateInfo->codeSize % 4 == 0); - - module = vk_alloc2(&device->alloc, pAllocator, - sizeof(*module) + pCreateInfo->codeSize, 8, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (module == NULL) - return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - - module->code_size = pCreateInfo->codeSize; - memcpy(module->code, pCreateInfo->pCode, pCreateInfo->codeSize); - - _mesa_sha1_compute(module->code, module->code_size, module->sha1); - - *pShaderModule = tu_shader_module_to_handle(module); - - return VK_SUCCESS; -} - -void -tu_DestroyShaderModule(VkDevice _device, - VkShaderModule _module, - const VkAllocationCallbacks *pAllocator) -{ - TU_FROM_HANDLE(tu_device, device, _device); - TU_FROM_HANDLE(tu_shader_module, module, _module); - - if (!module) - return; - - vk_free2(&device->alloc, pAllocator, module); -} diff --git a/lib/mesa/src/freedreno/vulkan/tu_suballoc.c b/lib/mesa/src/freedreno/vulkan/tu_suballoc.c deleted file mode 100644 index d61edf66a..000000000 --- a/lib/mesa/src/freedreno/vulkan/tu_suballoc.c +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Copyright © 2022 Google LLC - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * Suballocator for space within BOs. - * - * BOs are allocated at PAGE_SIZE (typically 4k) granularity, so small - * allocations are a waste to have in their own BO. Moreover, on DRM we track a - * list of all BOs currently allocated and submit the whole list for validation - * (busy tracking and implicit sync) on every submit, and that validation is a - * non-trivial cost. So, being able to pack multiple allocations into a BO can - * be a significant performance win. - * - * The allocator tracks a current BO it is linearly allocating from, and up to - * one extra BO returned to the pool when all of its previous suballocations - * have been freed. This means that fragmentation can be an issue for - * default_size > PAGE_SIZE and small allocations. Also, excessive BO - * reallocation may happen for workloads where default size < working set size. - */ - -#include "tu_private.h" - -/* Initializes a BO sub-allocator using refcounts on BOs. - */ -void -tu_bo_suballocator_init(struct tu_suballocator *suballoc, - struct tu_device *dev, - uint32_t default_size, uint32_t flags) -{ - suballoc->dev = dev; - suballoc->default_size = default_size; - suballoc->flags = flags; - suballoc->bo = NULL; - suballoc->cached_bo = NULL; -} - -void -tu_bo_suballocator_finish(struct tu_suballocator *suballoc) -{ - if (suballoc->bo) - tu_bo_finish(suballoc->dev, suballoc->bo); - if (suballoc->cached_bo) - tu_bo_finish(suballoc->dev, suballoc->cached_bo); -} - -VkResult -tu_suballoc_bo_alloc(struct tu_suballoc_bo *suballoc_bo, - struct tu_suballocator *suballoc, - uint32_t size, uint32_t align) -{ - struct tu_bo *bo = suballoc->bo; - if (bo) { - uint32_t offset = ALIGN(suballoc->next_offset, align); - if (offset + size <= bo->size) { - suballoc_bo->bo = tu_bo_get_ref(bo); - suballoc_bo->iova = bo->iova + offset; - suballoc_bo->size = size; - - suballoc->next_offset = offset + size; - return VK_SUCCESS; - } else { - tu_bo_finish(suballoc->dev, bo); - suballoc->bo = NULL; - } - } - - uint32_t alloc_size = MAX2(size, suballoc->default_size); - - /* Reuse a recycled suballoc BO if we have one and it's big enough, otherwise free it. */ - if (suballoc->cached_bo) { - if (alloc_size <= suballoc->cached_bo->size) - suballoc->bo = suballoc->cached_bo; - else - tu_bo_finish(suballoc->dev, suballoc->cached_bo); - suballoc->cached_bo = NULL; - } - - /* Allocate the new BO if we didn't have one cached. */ - if (!suballoc->bo) { - VkResult result = tu_bo_init_new(suballoc->dev, &suballoc->bo, - alloc_size, - suballoc->flags); - if (result != VK_SUCCESS) - return result; - } - - VkResult result = tu_bo_map(suballoc->dev, suballoc->bo); - if (result != VK_SUCCESS) { - tu_bo_finish(suballoc->dev, suballoc->bo); - return VK_ERROR_OUT_OF_HOST_MEMORY; - } - - suballoc_bo->bo = tu_bo_get_ref(suballoc->bo); - suballoc_bo->iova = suballoc_bo->bo->iova; - suballoc_bo->size = size; - suballoc->next_offset = size; - - return VK_SUCCESS; -} - -void -tu_suballoc_bo_free(struct tu_suballocator *suballoc, struct tu_suballoc_bo *bo) -{ - if (!bo->bo) - return; - - /* If we we held the last reference to this BO, so just move it to the - * suballocator for the next time we need to allocate. - */ - if (p_atomic_read(&bo->bo->refcnt) == 1 && !suballoc->cached_bo) { - suballoc->cached_bo = bo->bo; - return; - } - - /* Otherwise, drop the refcount on it normally. */ - tu_bo_finish(suballoc->dev, bo->bo); -} - -void * -tu_suballoc_bo_map(struct tu_suballoc_bo *bo) -{ - return bo->bo->map + (bo->iova - bo->bo->iova); -} diff --git a/lib/mesa/src/freedreno/vulkan/tu_util.c b/lib/mesa/src/freedreno/vulkan/tu_util.c deleted file mode 100644 index e630460fb..000000000 --- a/lib/mesa/src/freedreno/vulkan/tu_util.c +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "tu_private.h" - -#include <assert.h> -#include <errno.h> -#include <stdarg.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include "util/u_math.h" -#include "vk_enum_to_str.h" - -/* TODO: Add Android support to tu_log funcs */ - -/** Log an error message. */ -void tu_printflike(1, 2) tu_loge(const char *format, ...) -{ - va_list va; - - va_start(va, format); - tu_loge_v(format, va); - va_end(va); -} - -/** \see tu_loge() */ -void -tu_loge_v(const char *format, va_list va) -{ - fprintf(stderr, "vk: error: "); - vfprintf(stderr, format, va); - fprintf(stderr, "\n"); -} - -/** Log an error message. */ -void tu_printflike(1, 2) tu_logi(const char *format, ...) -{ - va_list va; - - va_start(va, format); - tu_logi_v(format, va); - va_end(va); -} - -/** \see tu_logi() */ -void -tu_logi_v(const char *format, va_list va) -{ - fprintf(stderr, "tu: info: "); - vfprintf(stderr, format, va); - fprintf(stderr, "\n"); -} - -void tu_printflike(3, 4) - __tu_finishme(const char *file, int line, const char *format, ...) -{ - va_list ap; - char buffer[256]; - - va_start(ap, format); - vsnprintf(buffer, sizeof(buffer), format, ap); - va_end(ap); - - fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buffer); -} - -VkResult -__vk_errorf(struct tu_instance *instance, - VkResult error, - const char *file, - int line, - const char *format, - ...) -{ - va_list ap; - char buffer[256]; - - const char *error_str = vk_Result_to_str(error); - -#ifndef DEBUG - return error; -#endif - - if (format) { - va_start(ap, format); - vsnprintf(buffer, sizeof(buffer), format, ap); - va_end(ap); - - fprintf(stderr, "%s:%d: %s (%s)\n", file, line, buffer, error_str); - } else { - fprintf(stderr, "%s:%d: %s\n", file, line, error_str); - } - - return error; -} diff --git a/lib/mesa/src/freedreno/vulkan/tu_wsi.c b/lib/mesa/src/freedreno/vulkan/tu_wsi.c deleted file mode 100644 index 21466108b..000000000 --- a/lib/mesa/src/freedreno/vulkan/tu_wsi.c +++ /dev/null @@ -1,272 +0,0 @@ -/* - * Copyright © 2016 Red Hat - * based on intel anv code: - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "tu_private.h" - -#include "vk_util.h" -#include "wsi_common.h" - -static PFN_vkVoidFunction -tu_wsi_proc_addr(VkPhysicalDevice physicalDevice, const char *pName) -{ - return tu_lookup_entrypoint_unchecked(pName); -} - -VkResult -tu_wsi_init(struct tu_physical_device *physical_device) -{ - return wsi_device_init(&physical_device->wsi_device, - tu_physical_device_to_handle(physical_device), - tu_wsi_proc_addr, &physical_device->instance->alloc, - physical_device->master_fd, NULL); -} - -void -tu_wsi_finish(struct tu_physical_device *physical_device) -{ - wsi_device_finish(&physical_device->wsi_device, - &physical_device->instance->alloc); -} - -void -tu_DestroySurfaceKHR(VkInstance _instance, - VkSurfaceKHR _surface, - const VkAllocationCallbacks *pAllocator) -{ - TU_FROM_HANDLE(tu_instance, instance, _instance); - ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface); - - vk_free2(&instance->alloc, pAllocator, surface); -} - -VkResult -tu_GetPhysicalDeviceSurfaceSupportKHR(VkPhysicalDevice physicalDevice, - uint32_t queueFamilyIndex, - VkSurfaceKHR surface, - VkBool32 *pSupported) -{ - TU_FROM_HANDLE(tu_physical_device, device, physicalDevice); - - return wsi_common_get_surface_support( - &device->wsi_device, queueFamilyIndex, surface, pSupported); -} - -VkResult -tu_GetPhysicalDeviceSurfaceCapabilitiesKHR( - VkPhysicalDevice physicalDevice, - VkSurfaceKHR surface, - VkSurfaceCapabilitiesKHR *pSurfaceCapabilities) -{ - TU_FROM_HANDLE(tu_physical_device, device, physicalDevice); - - return wsi_common_get_surface_capabilities(&device->wsi_device, surface, - pSurfaceCapabilities); -} - -VkResult -tu_GetPhysicalDeviceSurfaceCapabilities2KHR( - VkPhysicalDevice physicalDevice, - const VkPhysicalDeviceSurfaceInfo2KHR *pSurfaceInfo, - VkSurfaceCapabilities2KHR *pSurfaceCapabilities) -{ - TU_FROM_HANDLE(tu_physical_device, device, physicalDevice); - - return wsi_common_get_surface_capabilities2( - &device->wsi_device, pSurfaceInfo, pSurfaceCapabilities); -} - -VkResult -tu_GetPhysicalDeviceSurfaceCapabilities2EXT( - VkPhysicalDevice physicalDevice, - VkSurfaceKHR surface, - VkSurfaceCapabilities2EXT *pSurfaceCapabilities) -{ - TU_FROM_HANDLE(tu_physical_device, device, physicalDevice); - - return wsi_common_get_surface_capabilities2ext( - &device->wsi_device, surface, pSurfaceCapabilities); -} - -VkResult -tu_GetPhysicalDeviceSurfaceFormatsKHR(VkPhysicalDevice physicalDevice, - VkSurfaceKHR surface, - uint32_t *pSurfaceFormatCount, - VkSurfaceFormatKHR *pSurfaceFormats) -{ - TU_FROM_HANDLE(tu_physical_device, device, physicalDevice); - - return wsi_common_get_surface_formats( - &device->wsi_device, surface, pSurfaceFormatCount, pSurfaceFormats); -} - -VkResult -tu_GetPhysicalDeviceSurfaceFormats2KHR( - VkPhysicalDevice physicalDevice, - const VkPhysicalDeviceSurfaceInfo2KHR *pSurfaceInfo, - uint32_t *pSurfaceFormatCount, - VkSurfaceFormat2KHR *pSurfaceFormats) -{ - TU_FROM_HANDLE(tu_physical_device, device, physicalDevice); - - return wsi_common_get_surface_formats2(&device->wsi_device, pSurfaceInfo, - pSurfaceFormatCount, - pSurfaceFormats); -} - -VkResult -tu_GetPhysicalDeviceSurfacePresentModesKHR(VkPhysicalDevice physicalDevice, - VkSurfaceKHR surface, - uint32_t *pPresentModeCount, - VkPresentModeKHR *pPresentModes) -{ - TU_FROM_HANDLE(tu_physical_device, device, physicalDevice); - - return wsi_common_get_surface_present_modes( - &device->wsi_device, surface, pPresentModeCount, pPresentModes); -} - -VkResult -tu_CreateSwapchainKHR(VkDevice _device, - const VkSwapchainCreateInfoKHR *pCreateInfo, - const VkAllocationCallbacks *pAllocator, - VkSwapchainKHR *pSwapchain) -{ - TU_FROM_HANDLE(tu_device, device, _device); - const VkAllocationCallbacks *alloc; - if (pAllocator) - alloc = pAllocator; - else - alloc = &device->alloc; - - return wsi_common_create_swapchain(&device->physical_device->wsi_device, - tu_device_to_handle(device), - pCreateInfo, alloc, pSwapchain); -} - -void -tu_DestroySwapchainKHR(VkDevice _device, - VkSwapchainKHR swapchain, - const VkAllocationCallbacks *pAllocator) -{ - TU_FROM_HANDLE(tu_device, device, _device); - const VkAllocationCallbacks *alloc; - - if (pAllocator) - alloc = pAllocator; - else - alloc = &device->alloc; - - wsi_common_destroy_swapchain(_device, swapchain, alloc); -} - -VkResult -tu_GetSwapchainImagesKHR(VkDevice device, - VkSwapchainKHR swapchain, - uint32_t *pSwapchainImageCount, - VkImage *pSwapchainImages) -{ - return wsi_common_get_images(swapchain, pSwapchainImageCount, - pSwapchainImages); -} - -VkResult -tu_AcquireNextImageKHR(VkDevice device, - VkSwapchainKHR swapchain, - uint64_t timeout, - VkSemaphore semaphore, - VkFence fence, - uint32_t *pImageIndex) -{ - VkAcquireNextImageInfoKHR acquire_info = { - .sType = VK_STRUCTURE_TYPE_ACQUIRE_NEXT_IMAGE_INFO_KHR, - .swapchain = swapchain, - .timeout = timeout, - .semaphore = semaphore, - .fence = fence, - .deviceMask = 0, - }; - - return tu_AcquireNextImage2KHR(device, &acquire_info, pImageIndex); -} - -VkResult -tu_AcquireNextImage2KHR(VkDevice _device, - const VkAcquireNextImageInfoKHR *pAcquireInfo, - uint32_t *pImageIndex) -{ - TU_FROM_HANDLE(tu_device, device, _device); - struct tu_physical_device *pdevice = device->physical_device; - - VkResult result = wsi_common_acquire_next_image2( - &pdevice->wsi_device, _device, pAcquireInfo, pImageIndex); - - /* TODO signal fence and semaphore */ - - return result; -} - -VkResult -tu_QueuePresentKHR(VkQueue _queue, const VkPresentInfoKHR *pPresentInfo) -{ - TU_FROM_HANDLE(tu_queue, queue, _queue); - return wsi_common_queue_present( - &queue->device->physical_device->wsi_device, - tu_device_to_handle(queue->device), _queue, queue->queue_family_index, - pPresentInfo); -} - -VkResult -tu_GetDeviceGroupPresentCapabilitiesKHR( - VkDevice device, VkDeviceGroupPresentCapabilitiesKHR *pCapabilities) -{ - memset(pCapabilities->presentMask, 0, sizeof(pCapabilities->presentMask)); - pCapabilities->presentMask[0] = 0x1; - pCapabilities->modes = VK_DEVICE_GROUP_PRESENT_MODE_LOCAL_BIT_KHR; - - return VK_SUCCESS; -} - -VkResult -tu_GetDeviceGroupSurfacePresentModesKHR( - VkDevice device, - VkSurfaceKHR surface, - VkDeviceGroupPresentModeFlagsKHR *pModes) -{ - *pModes = VK_DEVICE_GROUP_PRESENT_MODE_LOCAL_BIT_KHR; - - return VK_SUCCESS; -} - -VkResult -tu_GetPhysicalDevicePresentRectanglesKHR(VkPhysicalDevice physicalDevice, - VkSurfaceKHR surface, - uint32_t *pRectCount, - VkRect2D *pRects) -{ - TU_FROM_HANDLE(tu_physical_device, device, physicalDevice); - - return wsi_common_get_present_rectangles(&device->wsi_device, surface, - pRectCount, pRects); -} |