Skip to content

Commit 0b713cd

Browse files
committed
small optimizations / tweaks
1 parent a1b2ecb commit 0b713cd

File tree

1 file changed

+56
-33
lines changed

1 file changed

+56
-33
lines changed

src/external/rlsw.h

+56-33
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,7 @@ void swBindTexture(uint32_t id);
324324
#ifdef RLSW_IMPL
325325

326326
#include <stdlib.h>
327+
#include <stddef.h>
327328
#include <math.h>
328329

329330
/* === Defines and Macros === */
@@ -519,10 +520,10 @@ static inline float sw_lerp(float a, float b, float t)
519520
return a + t * (b - a);
520521
}
521522

522-
static inline sw_vertex_t sw_lerp_vertex(const sw_vertex_t* a, const sw_vertex_t* b, float t)
523+
static inline sw_vertex_t sw_lerp_vertex_PNTCH(const sw_vertex_t* a, const sw_vertex_t* b, float t)
523524
{
524525
sw_vertex_t result;
525-
for (int i = 0; i < sizeof(sw_vertex_t) / sizeof(float); i++) {
526+
for (int i = 0; i < offsetof(sw_vertex_t, screen) / sizeof(float); i++) {
526527
((float*)&result)[i] = sw_lerp(((float*)a)[i], ((float*)b)[i], t);
527528
}
528529
return result;
@@ -1031,7 +1032,7 @@ static inline bool sw_triangle_clip_w(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYGON
10311032
for (int i = 0; i < inputCounter; i++) {
10321033
char currDot = (input[i].homogeneous[3] < SW_CLIP_EPSILON) ? -1 : 1;
10331034
if (prevDot*currDot < 0) {
1034-
polygon[(*vertexCounter)++] = sw_lerp_vertex(prevVt, &input[i],
1035+
polygon[(*vertexCounter)++] = sw_lerp_vertex_PNTCH(prevVt, &input[i],
10351036
(SW_CLIP_EPSILON - prevVt->homogeneous[3]) / (input[i].homogeneous[3] - prevVt->homogeneous[3]));
10361037
}
10371038
if (currDot > 0) {
@@ -1070,7 +1071,7 @@ static inline bool sw_triangle_clip_xyz(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYG
10701071
for (int i = 0; i < inputCounter; i++) {
10711072
char currDot = (input[i].homogeneous[iAxis] <= input[i].homogeneous[3]) ? 1 : -1;
10721073
if (prevDot * currDot <= 0) {
1073-
polygon[(*vertexCounter)++] = sw_lerp_vertex(prevVt, &input[i], (prevVt->homogeneous[3] - prevVt->homogeneous[iAxis]) /
1074+
polygon[(*vertexCounter)++] = sw_lerp_vertex_PNTCH(prevVt, &input[i], (prevVt->homogeneous[3] - prevVt->homogeneous[iAxis]) /
10741075
((prevVt->homogeneous[3] - prevVt->homogeneous[iAxis]) - (input[i].homogeneous[3] - input[i].homogeneous[iAxis])));
10751076
}
10761077
if (currDot > 0) {
@@ -1096,7 +1097,7 @@ static inline bool sw_triangle_clip_xyz(sw_vertex_t polygon[SW_MAX_CLIPPED_POLYG
10961097
for (int i = 0; i < inputCounter; i++) {
10971098
char currDot = (-input[i].homogeneous[iAxis] <= input[i].homogeneous[3]) ? 1 : -1;
10981099
if (prevDot*currDot <= 0) {
1099-
polygon[(*vertexCounter)++] = sw_lerp_vertex(prevVt, &input[i], (prevVt->homogeneous[3] + prevVt->homogeneous[iAxis]) /
1100+
polygon[(*vertexCounter)++] = sw_lerp_vertex_PNTCH(prevVt, &input[i], (prevVt->homogeneous[3] + prevVt->homogeneous[iAxis]) /
11001101
((prevVt->homogeneous[3] + prevVt->homogeneous[iAxis]) - (input[i].homogeneous[3] + input[i].homogeneous[iAxis])));
11011102
}
11021103
if (currDot > 0) {
@@ -1162,17 +1163,13 @@ static inline void sw_triangle_project_and_clip(sw_vertex_t polygon[SW_MAX_CLIPP
11621163
static inline void FUNC_NAME(const sw_texture_t* tex, const sw_vertex_t* start, \
11631164
const sw_vertex_t* end, float yDu, float yDv) \
11641165
{ \
1165-
/* Calculate the horizontal width and avoid division by zero */ \
1166-
float dx = end->screen[0] - start->screen[0]; \
1167-
if (fabsf(dx) < 1e-4f) return; \
1168-
\
11691166
/* Convert and center the screen coordinates */ \
11701167
int xStart = (int)(start->screen[0] + 0.5f); \
11711168
int xEnd = (int)(end->screen[0] + 0.5f); \
11721169
int y = (int)(start->screen[1] + 0.5f); \
11731170
\
11741171
/* Calculate the initial interpolation parameter and its increment */ \
1175-
float dt = 1.0f / dx; \
1172+
float dt = 1.0f / (end->screen[0] - start->screen[0]); \
11761173
float t = (xStart - start->screen[0]) * dt; \
11771174
\
11781175
float xDu, xDv; \
@@ -1295,20 +1292,20 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1, const
12951292
float x1 = v1->screen[0], y1 = v1->screen[1]; \
12961293
float x2 = v2->screen[0], y2 = v2->screen[1]; \
12971294
\
1298-
/* Reject degenerate triangles */ \
1299-
float height = y2 - y0; \
1300-
if (height < 1e-4f) return; \
1295+
/* Compute height differences */ \
1296+
float h20 = y2 - y0; \
1297+
float h10 = y1 - y0; \
1298+
float h21 = y2 - y1; \
13011299
\
1302-
/* Precompute the inverse of the triangle height and */ \
1303-
/* edge lengths with checks to avoid division by zero. */ \
1304-
float inv_height = 1.0f / height; \
1305-
float inv_y1y0 = (y1 - y0 > 1e-4f) ? 1.0f / (y1 - y0) : 0.0f; \
1306-
float inv_y2y1 = (y2 - y1 > 1e-4f) ? 1.0f / (y2 - y1) : 0.0f; \
1300+
/* Precompute the inverse values without additional checks */ \
1301+
float invH20 = (h20 > 1e-6f) ? 1.0f / h20 : 0.0f; \
1302+
float invH10 = (h10 > 1e-6f) ? 1.0f / h10 : 0.0f; \
1303+
float invH21 = (h21 > 1e-6f) ? 1.0f / h21 : 0.0f; \
13071304
\
13081305
/* Pre-calculation of slopes (dx/dy) */ \
1309-
float dx02 = (x2 - x0) * inv_height; \
1310-
float dx01 = (x1 - x0) * inv_y1y0; \
1311-
float dx12 = (x2 - x1) * inv_y2y1; \
1306+
float dx02 = (x2 - x0) * invH20; \
1307+
float dx01 = (x1 - x0) * invH10; \
1308+
float dx12 = (x2 - x1) * invH21; \
13121309
\
13131310
/* Y bounds (vertical clipping) */ \
13141311
int yTop = (int)(y0 + 0.5f); \
@@ -1318,8 +1315,8 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1, const
13181315
/* Global calculation of vertical texture gradients for the triangle */ \
13191316
float yDu, yDv; \
13201317
if (ENABLE_TEXTURE) { \
1321-
yDu = (v2->texcoord[0] - v0->texcoord[0]) * inv_height; \
1322-
yDv = (v2->texcoord[1] - v0->texcoord[1]) * inv_height; \
1318+
yDu = (v2->texcoord[0] - v0->texcoord[0]) * invH20; \
1319+
yDv = (v2->texcoord[1] - v0->texcoord[1]) * invH20; \
13231320
} \
13241321
\
13251322
/* Initializing scanline variables */ \
@@ -1328,45 +1325,71 @@ static inline void FUNC_NAME(const sw_vertex_t* v0, const sw_vertex_t* v1, const
13281325
\
13291326
/* Scanline for the upper part of the triangle */ \
13301327
for (int y = yTop; y < yMiddle; y++) { \
1328+
\
1329+
/* Discard the lines that are degenerate */ \
1330+
if (fabsf(xRight - xLeft) <= 1e-6f) { \
1331+
goto discardTL; \
1332+
} \
1333+
\
1334+
/* Calculation of interpolation factors */ \
13311335
float dy = (float)y - y0; \
1332-
float t1 = dy * inv_height; \
1333-
float t2 = dy * inv_y1y0; \
1336+
float t1 = dy * invH20; \
1337+
float t2 = dy * invH10; \
13341338
\
13351339
/* Vertex interpolation */ \
1336-
start = sw_lerp_vertex(v0, v2, t1); \
1337-
end = sw_lerp_vertex(v0, v1, t2); \
1340+
start = sw_lerp_vertex_PNTCH(v0, v2, t1); \
1341+
end = sw_lerp_vertex_PNTCH(v0, v1, t2); \
13381342
start.screen[0] = xLeft; \
13391343
start.screen[1] = (float)y; \
13401344
end.screen[0] = xRight; \
13411345
end.screen[1] = (float)y; \
13421346
\
1343-
if (xLeft > xRight) { sw_vertex_t tmp = start; start = end; end = tmp; } \
1347+
if (xLeft > xRight) { \
1348+
sw_vertex_t tmp = start; \
1349+
start = end; \
1350+
end = tmp; \
1351+
} \
1352+
\
13441353
FUNC_SCANLINE(tex, &start, &end, yDu, yDv); \
13451354
\
13461355
/* Incremental update */ \
1356+
discardTL: \
13471357
xLeft += dx02; \
13481358
xRight += dx01; \
13491359
} \
13501360
\
13511361
/* Scanline for the lower part of the triangle */ \
13521362
xRight = x1; /* Restart the right side from the second vertex */ \
13531363
for (int y = yMiddle; y < yBottom; y++) { \
1364+
\
1365+
/* Discard the lines that are degenerate */ \
1366+
if (fabsf(xRight - xLeft) <= 1e-6f) { \
1367+
goto discardBL; \
1368+
} \
1369+
\
1370+
/* Calculation of interpolation factors */ \
13541371
float dy = (float)y - y0; \
1355-
float t1 = dy * inv_height; \
1356-
float t2 = (float)(y - y1) * inv_y2y1; \
1372+
float t1 = dy * invH20; \
1373+
float t2 = (float)(y - y1) * invH21; \
13571374
\
13581375
/* Vertex interpolation */ \
1359-
start = sw_lerp_vertex(v0, v2, t1); \
1360-
end = sw_lerp_vertex(v1, v2, t2); \
1376+
start = sw_lerp_vertex_PNTCH(v0, v2, t1); \
1377+
end = sw_lerp_vertex_PNTCH(v1, v2, t2); \
13611378
start.screen[0] = xLeft; \
13621379
start.screen[1] = (float)y; \
13631380
end.screen[0] = xRight; \
13641381
end.screen[1] = (float)y; \
13651382
\
1366-
if (xLeft > xRight) { sw_vertex_t tmp = start; start = end; end = tmp; } \
1383+
if (xLeft > xRight) { \
1384+
sw_vertex_t tmp = start; \
1385+
start = end; \
1386+
end = tmp; \
1387+
} \
1388+
\
13671389
FUNC_SCANLINE(tex, &start, &end, yDu, yDv); \
13681390
\
13691391
/* Incremental update */ \
1392+
discardBL: \
13701393
xLeft += dx02; \
13711394
xRight += dx12; \
13721395
} \

0 commit comments

Comments
 (0)