Skip to content

Commit a08d483

Browse files
committed
Refactor and consolidate all SIMD handlers
1 parent 4b9ade9 commit a08d483

13 files changed

+275
-727
lines changed

src_c/alphablit.c

+110-111
Large diffs are not rendered by default.

src_c/simd_blitters.h

+5-24
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,9 @@
11
#define NO_PYGAME_C_API
22
#include "_surface.h"
33
#include "_blit_info.h"
4+
#include "simd_shared.h"
45

5-
#if !defined(PG_ENABLE_ARM_NEON) && defined(__aarch64__)
6-
// arm64 has neon optimisations enabled by default, even when fpu=neon is not
7-
// passed
8-
#define PG_ENABLE_ARM_NEON 1
9-
#endif
10-
11-
#if (defined(__SSE2__) || defined(PG_ENABLE_ARM_NEON))
6+
#ifdef PG_HAS_SSE2_OR_NEON
127
void
138
alphablit_alpha_sse2_argb_surf_alpha(SDL_BlitInfo *info);
149
void
@@ -37,26 +32,11 @@ void
3732
blit_blend_rgb_min_sse2(SDL_BlitInfo *info);
3833
void
3934
blit_blend_premultiplied_sse2(SDL_BlitInfo *info);
40-
#endif /* (defined(__SSE2__) || defined(PG_ENABLE_ARM_NEON)) */
41-
42-
/* Deliberately putting these outside of the preprocessor guards as I want to
43-
move to a system of trusting the runtime checks to head to the right
44-
function and having a fallback function there if pygame is not compiled
45-
with the right stuff (this is the strategy used for AVX2 right now.
46-
Potentially I might want to shift both these into a slightly different
47-
file as they are not exactly blits (though v. similar) - or I could rename
48-
the SIMD trilogy of files to replace the word blit with something more
49-
generic like surface_ops*/
50-
51-
void
52-
premul_surf_color_by_alpha_non_simd(SDL_Surface *src,
53-
PG_PixelFormat *src_format,
54-
SDL_Palette *src_palette, SDL_Surface *dst,
55-
PG_PixelFormat *dst_format,
56-
SDL_Palette *dst_palette);
5735
void
5836
premul_surf_color_by_alpha_sse2(SDL_Surface *src, SDL_Surface *dst);
37+
#endif /* PG_HAS_SSE2_OR_NEON */
5938

39+
#ifdef PG_HAS_AVX2
6040
void
6141
alphablit_alpha_avx2_argb_no_surf_alpha_opaque_dst(SDL_BlitInfo *info);
6242
void
@@ -87,3 +67,4 @@ void
8767
blit_blend_premultiplied_avx2(SDL_BlitInfo *info);
8868
void
8969
premul_surf_color_by_alpha_avx2(SDL_Surface *src, SDL_Surface *dst);
70+
#endif

src_c/simd_blitters_avx2.c

+4-191
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,16 @@
11
#include "simd_blitters.h"
22

3-
#if defined(HAVE_IMMINTRIN_H) && !defined(SDL_DISABLE_IMMINTRIN_H)
4-
#include <immintrin.h>
5-
#endif /* defined(HAVE_IMMINTRIN_H) && !defined(SDL_DISABLE_IMMINTRIN_H) */
6-
7-
#define BAD_AVX2_FUNCTION_CALL \
8-
printf( \
9-
"Fatal Error: Attempted calling an AVX2 function when both compile " \
10-
"time and runtime support is missing. If you are seeing this " \
11-
"message, you have stumbled across a pygame bug, please report it " \
12-
"to the devs!"); \
13-
PG_EXIT(1)
14-
15-
/* helper function that does a runtime check for AVX2. It has the added
16-
* functionality of also returning 0 if compile time support is missing */
17-
int
18-
pg_has_avx2()
19-
{
20-
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
21-
!defined(SDL_DISABLE_IMMINTRIN_H)
22-
return SDL_HasAVX2();
23-
#else
24-
return 0;
25-
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
26-
!defined(SDL_DISABLE_IMMINTRIN_H) */
27-
}
28-
293
/* This returns 1 when avx2 is available at runtime but support for it isn't
304
* compiled in, 0 in all other cases */
315
int
326
pg_avx2_at_runtime_but_uncompiled()
337
{
348
if (SDL_HasAVX2()) {
35-
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
36-
!defined(SDL_DISABLE_IMMINTRIN_H)
9+
#ifdef PG_HAS_AVX2
3710
return 0;
3811
#else
3912
return 1;
40-
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
41-
!defined(SDL_DISABLE_IMMINTRIN_H) */
13+
#endif /* PG_HAS_AVX2 */
4214
}
4315
return 0;
4416
}
@@ -190,8 +162,7 @@ pg_avx2_at_runtime_but_uncompiled()
190162
_mm256_srli_epi16( \
191163
_mm256_mulhi_epu16(MM256I, _mm256_set1_epi16((short)0x8081)), 7);
192164

193-
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
194-
!defined(SDL_DISABLE_IMMINTRIN_H)
165+
#ifdef PG_HAS_AVX2
195166
void
196167
alphablit_alpha_avx2_argb_no_surf_alpha_opaque_dst(SDL_BlitInfo *info)
197168
{
@@ -258,17 +229,7 @@ alphablit_alpha_avx2_argb_no_surf_alpha_opaque_dst(SDL_BlitInfo *info)
258229
* surfaces. */
259230
pixels_dst = _mm256_and_si256(pixels_dst, mask_out_alpha);)
260231
}
261-
#else
262-
void
263-
alphablit_alpha_avx2_argb_no_surf_alpha_opaque_dst(SDL_BlitInfo *info)
264-
{
265-
BAD_AVX2_FUNCTION_CALL;
266-
}
267-
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
268-
!defined(SDL_DISABLE_IMMINTRIN_H) */
269232

270-
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
271-
!defined(SDL_DISABLE_IMMINTRIN_H)
272233
void
273234
alphablit_alpha_avx2_argb_no_surf_alpha(SDL_BlitInfo *info)
274235
{
@@ -324,17 +285,7 @@ alphablit_alpha_avx2_argb_no_surf_alpha(SDL_BlitInfo *info)
324285
shuff_dst =
325286
_mm256_blendv_epi8(shuff_dst, new_dst_alpha, combine_rgba_mask);))
326287
}
327-
#else
328-
void
329-
alphablit_alpha_avx2_argb_no_surf_alpha(SDL_BlitInfo *info)
330-
{
331-
BAD_AVX2_FUNCTION_CALL;
332-
}
333-
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
334-
!defined(SDL_DISABLE_IMMINTRIN_H) */
335288

336-
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
337-
!defined(SDL_DISABLE_IMMINTRIN_H)
338289
void
339290
alphablit_alpha_avx2_argb_surf_alpha(SDL_BlitInfo *info)
340291
{
@@ -406,17 +357,6 @@ alphablit_alpha_avx2_argb_surf_alpha(SDL_BlitInfo *info)
406357
shuff_dst =
407358
_mm256_blendv_epi8(shuff_dst, new_dst_alpha, combine_rgba_mask);))
408359
}
409-
#else
410-
void
411-
alphablit_alpha_avx2_argb_surf_alpha(SDL_BlitInfo *info)
412-
{
413-
BAD_AVX2_FUNCTION_CALL;
414-
}
415-
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
416-
!defined(SDL_DISABLE_IMMINTRIN_H) */
417-
418-
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
419-
!defined(SDL_DISABLE_IMMINTRIN_H)
420360
void
421361
blit_blend_rgba_mul_avx2(SDL_BlitInfo *info)
422362
{
@@ -524,17 +464,6 @@ blit_blend_rgba_mul_avx2(SDL_BlitInfo *info)
524464
dstp = (Uint32 *)dstp256 + dstskip;
525465
}
526466
}
527-
#else
528-
void
529-
blit_blend_rgba_mul_avx2(SDL_BlitInfo *info)
530-
{
531-
BAD_AVX2_FUNCTION_CALL;
532-
}
533-
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
534-
!defined(SDL_DISABLE_IMMINTRIN_H) */
535-
536-
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
537-
!defined(SDL_DISABLE_IMMINTRIN_H)
538467
void
539468
blit_blend_rgb_mul_avx2(SDL_BlitInfo *info)
540469
{
@@ -653,17 +582,6 @@ blit_blend_rgb_mul_avx2(SDL_BlitInfo *info)
653582
dstp = (Uint32 *)dstp256 + dstskip;
654583
}
655584
}
656-
#else
657-
void
658-
blit_blend_rgb_mul_avx2(SDL_BlitInfo *info)
659-
{
660-
BAD_AVX2_FUNCTION_CALL;
661-
}
662-
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
663-
!defined(SDL_DISABLE_IMMINTRIN_H) */
664-
665-
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
666-
!defined(SDL_DISABLE_IMMINTRIN_H)
667585
void
668586
blit_blend_rgba_add_avx2(SDL_BlitInfo *info)
669587
{
@@ -725,17 +643,6 @@ blit_blend_rgba_add_avx2(SDL_BlitInfo *info)
725643
dstp = (Uint32 *)dstp256 + dstskip;
726644
}
727645
}
728-
#else
729-
void
730-
blit_blend_rgba_add_avx2(SDL_BlitInfo *info)
731-
{
732-
BAD_AVX2_FUNCTION_CALL;
733-
}
734-
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
735-
!defined(SDL_DISABLE_IMMINTRIN_H) */
736-
737-
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
738-
!defined(SDL_DISABLE_IMMINTRIN_H)
739646
void
740647
blit_blend_rgb_add_avx2(SDL_BlitInfo *info)
741648
{
@@ -805,17 +712,6 @@ blit_blend_rgb_add_avx2(SDL_BlitInfo *info)
805712
dstp = (Uint32 *)dstp256 + dstskip;
806713
}
807714
}
808-
#else
809-
void
810-
blit_blend_rgb_add_avx2(SDL_BlitInfo *info)
811-
{
812-
BAD_AVX2_FUNCTION_CALL;
813-
}
814-
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
815-
!defined(SDL_DISABLE_IMMINTRIN_H) */
816-
817-
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
818-
!defined(SDL_DISABLE_IMMINTRIN_H)
819715
void
820716
blit_blend_rgba_sub_avx2(SDL_BlitInfo *info)
821717
{
@@ -877,17 +773,6 @@ blit_blend_rgba_sub_avx2(SDL_BlitInfo *info)
877773
dstp = (Uint32 *)dstp256 + dstskip;
878774
}
879775
}
880-
#else
881-
void
882-
blit_blend_rgba_sub_avx2(SDL_BlitInfo *info)
883-
{
884-
BAD_AVX2_FUNCTION_CALL;
885-
}
886-
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
887-
!defined(SDL_DISABLE_IMMINTRIN_H) */
888-
889-
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
890-
!defined(SDL_DISABLE_IMMINTRIN_H)
891776
void
892777
blit_blend_rgb_sub_avx2(SDL_BlitInfo *info)
893778
{
@@ -957,17 +842,6 @@ blit_blend_rgb_sub_avx2(SDL_BlitInfo *info)
957842
dstp = (Uint32 *)dstp256 + dstskip;
958843
}
959844
}
960-
#else
961-
void
962-
blit_blend_rgb_sub_avx2(SDL_BlitInfo *info)
963-
{
964-
BAD_AVX2_FUNCTION_CALL;
965-
}
966-
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
967-
!defined(SDL_DISABLE_IMMINTRIN_H) */
968-
969-
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
970-
!defined(SDL_DISABLE_IMMINTRIN_H)
971845
void
972846
blit_blend_rgba_max_avx2(SDL_BlitInfo *info)
973847
{
@@ -1029,17 +903,6 @@ blit_blend_rgba_max_avx2(SDL_BlitInfo *info)
1029903
dstp = (Uint32 *)dstp256 + dstskip;
1030904
}
1031905
}
1032-
#else
1033-
void
1034-
blit_blend_rgba_max_avx2(SDL_BlitInfo *info)
1035-
{
1036-
BAD_AVX2_FUNCTION_CALL;
1037-
}
1038-
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
1039-
!defined(SDL_DISABLE_IMMINTRIN_H) */
1040-
1041-
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
1042-
!defined(SDL_DISABLE_IMMINTRIN_H)
1043906
void
1044907
blit_blend_rgb_max_avx2(SDL_BlitInfo *info)
1045908
{
@@ -1109,17 +972,6 @@ blit_blend_rgb_max_avx2(SDL_BlitInfo *info)
1109972
dstp = (Uint32 *)dstp256 + dstskip;
1110973
}
1111974
}
1112-
#else
1113-
void
1114-
blit_blend_rgb_max_avx2(SDL_BlitInfo *info)
1115-
{
1116-
BAD_AVX2_FUNCTION_CALL;
1117-
}
1118-
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
1119-
!defined(SDL_DISABLE_IMMINTRIN_H) */
1120-
1121-
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
1122-
!defined(SDL_DISABLE_IMMINTRIN_H)
1123975
void
1124976
blit_blend_rgba_min_avx2(SDL_BlitInfo *info)
1125977
{
@@ -1181,17 +1033,6 @@ blit_blend_rgba_min_avx2(SDL_BlitInfo *info)
11811033
dstp = (Uint32 *)dstp256 + dstskip;
11821034
}
11831035
}
1184-
#else
1185-
void
1186-
blit_blend_rgba_min_avx2(SDL_BlitInfo *info)
1187-
{
1188-
BAD_AVX2_FUNCTION_CALL;
1189-
}
1190-
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
1191-
!defined(SDL_DISABLE_IMMINTRIN_H) */
1192-
1193-
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
1194-
!defined(SDL_DISABLE_IMMINTRIN_H)
11951036
void
11961037
blit_blend_rgb_min_avx2(SDL_BlitInfo *info)
11971038
{
@@ -1261,17 +1102,6 @@ blit_blend_rgb_min_avx2(SDL_BlitInfo *info)
12611102
dstp = (Uint32 *)dstp256 + dstskip;
12621103
}
12631104
}
1264-
#else
1265-
void
1266-
blit_blend_rgb_min_avx2(SDL_BlitInfo *info)
1267-
{
1268-
BAD_AVX2_FUNCTION_CALL;
1269-
}
1270-
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
1271-
!defined(SDL_DISABLE_IMMINTRIN_H) */
1272-
1273-
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
1274-
!defined(SDL_DISABLE_IMMINTRIN_H)
12751105
void
12761106
blit_blend_premultiplied_avx2(SDL_BlitInfo *info)
12771107
{
@@ -1521,14 +1351,6 @@ blit_blend_premultiplied_avx2(SDL_BlitInfo *info)
15211351
dstp = (Uint32 *)dstp256 + dstskip;
15221352
}
15231353
}
1524-
#else
1525-
void
1526-
blit_blend_premultiplied_avx2(SDL_BlitInfo *info)
1527-
{
1528-
BAD_AVX2_FUNCTION_CALL;
1529-
}
1530-
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
1531-
!defined(SDL_DISABLE_IMMINTRIN_H) */
15321354

15331355
#define PREMUL_ALPHA_CODE \
15341356
/* extract the alpha */ \
@@ -1558,8 +1380,6 @@ blit_blend_premultiplied_avx2(SDL_BlitInfo *info)
15581380
/*add the original alpha back in*/ \
15591381
mm_dst = _mm256_or_si256(mm_dst, mm_alpha_in);
15601382

1561-
#if defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
1562-
!defined(SDL_DISABLE_IMMINTRIN_H)
15631383
void
15641384
premul_surf_color_by_alpha_avx2(SDL_Surface *src, SDL_Surface *dst)
15651385
{
@@ -1635,11 +1455,4 @@ premul_surf_color_by_alpha_avx2(SDL_Surface *src, SDL_Surface *dst)
16351455
dstp += dst_skip;
16361456
}
16371457
}
1638-
#else
1639-
void
1640-
premul_surf_color_by_alpha_avx2(SDL_Surface *src, SDL_Surface *dst)
1641-
{
1642-
BAD_AVX2_FUNCTION_CALL;
1643-
}
1644-
#endif /* defined(__AVX2__) && defined(HAVE_IMMINTRIN_H) && \
1645-
!defined(SDL_DISABLE_IMMINTRIN_H) */
1458+
#endif /* PG_HAS_AVX2 */

0 commit comments

Comments
 (0)