Latest compatible version of Classicube from the original GitHub repository (https://github.com/ClassiCube/ClassiCube) that can be compiled on Classicube for PowerMac PPC running Mac OS X 10.4.
This commit is contained in:
380
misc/n64/rsp_gpu_clipping.inc
Normal file
380
misc/n64/rsp_gpu_clipping.inc
Normal file
@@ -0,0 +1,380 @@
|
||||
#define CLIPPING_PLANE_COUNT 6
|
||||
#define CLIPPING_CACHE_SIZE 9
|
||||
#define CLIPPING_PLANE_SIZE 8
|
||||
|
||||
.section .data.gl_clipping
|
||||
|
||||
.align 4
|
||||
CLIP_PLANES:
|
||||
.half 1, 0, 0, GUARD_BAND_FACTOR
|
||||
.half 0, 1, 0, GUARD_BAND_FACTOR
|
||||
.half 0, 0, 1, 1
|
||||
.half 1, 0, 0, -GUARD_BAND_FACTOR
|
||||
.half 0, 1, 0, -GUARD_BAND_FACTOR
|
||||
.half 0, 0, 1, -1
|
||||
|
||||
.align 4
|
||||
CACHE_OFFSETS: .half 2,4,6,8,10,12,14,16,18
|
||||
|
||||
.section .bss.gl_clipping
|
||||
|
||||
CLIP_CACHE: .dcb.b SCREEN_VTX_SIZE * CLIPPING_CACHE_SIZE
|
||||
CLIP_CACHE_END:
|
||||
|
||||
CLIP_LISTS:
|
||||
CLIP_LIST0: .dcb.w CLIPPING_CACHE_SIZE
|
||||
CLIP_LIST1: .dcb.w CLIPPING_CACHE_SIZE
|
||||
|
||||
|
||||
.section .text.gl_clipping
|
||||
|
||||
################################################################
|
||||
# GL_ClipTriangle
|
||||
# Clip a triangle against the view-frustum by using the Sutherland-Hodgman algorithm
|
||||
# https://en.wikipedia.org/wiki/Sutherland%E2%80%93Hodgman_algorithm
|
||||
# Args:
|
||||
# a1-a3 = Vertices
|
||||
# t5 = OR'd clip flags of the triangle's vertices
|
||||
# Returns:
|
||||
# s1 = Pointer to list of output vertices
|
||||
# s2 = Pointer to end of list
|
||||
################################################################
|
||||
.func GL_ClipTriangle
|
||||
GL_ClipTriangle:
|
||||
#define out_count v1
|
||||
#define clip_flags t5
|
||||
#define plane_flag t6
|
||||
#define in_count t7
|
||||
#define in_end t8
|
||||
#define in_list s0
|
||||
#define out_list s1
|
||||
#define plane s2
|
||||
#define intersection s3
|
||||
#define cur_ptr s4
|
||||
#define prev_ptr s5
|
||||
#define cur_vtx s6
|
||||
#define prev_vtx s7
|
||||
#define p0 k0
|
||||
#define p1 k1
|
||||
#define vtx1 a1
|
||||
#define vtx2 a2
|
||||
#define vtx3 a3
|
||||
|
||||
#define vplane $v01
|
||||
#define vint_f $v02
|
||||
#define vint_i $v03
|
||||
#define vdot_i $v04
|
||||
#define vdot_f $v05
|
||||
#define vdiff_i $v06
|
||||
#define vdiff_f $v07
|
||||
#define va_i $v08
|
||||
#define va_f $v09
|
||||
#define vpos_i $v10
|
||||
#define vpos_f $v11
|
||||
#define vattr0 $v12
|
||||
#define vattr1 $v13
|
||||
#define voff0 $v14
|
||||
#define voff1 $v15
|
||||
#define vcache0 $v16
|
||||
#define vcache1 $v17
|
||||
#define v__ $v29
|
||||
|
||||
move ra2, ra
|
||||
|
||||
# Init in_list as empty
|
||||
li in_list, %lo(CLIP_LIST0)
|
||||
move in_count, zero
|
||||
|
||||
# Put three original vertices in the out_list
|
||||
# (So after the initial swap they will be in the in_list)
|
||||
li out_list, %lo(CLIP_LIST1)
|
||||
sh vtx1, 0(out_list)
|
||||
sh vtx2, 2(out_list)
|
||||
sh vtx3, 4(out_list)
|
||||
li out_count, 3*2
|
||||
|
||||
li plane, %lo(CLIP_PLANES)
|
||||
li plane_flag, 1
|
||||
|
||||
# Load cache offsets
|
||||
li t0, %lo(CACHE_OFFSETS)
|
||||
vxor voff1, voff1
|
||||
lqv voff0, 0,t0
|
||||
lsv voff1, 16,t0
|
||||
|
||||
# Temporarily use the RDP staging area as a map of which cache slots are used
|
||||
# Init to zero
|
||||
li t0, %lo(RDPQ_CMD_STAGING)
|
||||
sqv vzero, 0,t0
|
||||
sqv vzero, 16,t0
|
||||
|
||||
# Iterate over the 6 clipping planes
|
||||
gl_clip_plane_loop:
|
||||
and t0, clip_flags, plane_flag
|
||||
beqz t0, gl_clip_plane_loop_end
|
||||
move t1, in_list
|
||||
|
||||
# Swap in and out lists
|
||||
|
||||
# If the out list is empty from the last iteration,
|
||||
# the triangle has no visible points and we are done
|
||||
beqz out_count, gl_clip_return
|
||||
move in_list, out_list
|
||||
move out_list, t1
|
||||
move in_count, out_count
|
||||
move out_count, zero
|
||||
|
||||
# Iterate over the egdes of the polygon in the input list
|
||||
# The current edge is between cur_vtx and prev_vtx
|
||||
move cur_ptr, in_list
|
||||
add in_end, in_list, in_count
|
||||
# Init the "previous" vertex to the last in the list for the wrap-around
|
||||
addi prev_ptr, in_end, -2
|
||||
|
||||
gl_clip_edge_loop:
|
||||
#define cur_flag t3
|
||||
#define prev_flag t4
|
||||
|
||||
# Check which side of the plane the two vertices are on
|
||||
lhu cur_vtx, 0(cur_ptr)
|
||||
lhu prev_vtx, 0(prev_ptr)
|
||||
lbu cur_flag, SCREEN_VTX_CLIP_CODE(cur_vtx)
|
||||
lbu prev_flag, SCREEN_VTX_CLIP_CODE(prev_vtx)
|
||||
and cur_flag, plane_flag
|
||||
and prev_flag, plane_flag
|
||||
|
||||
# If they are on opposite sides, there is an intersection
|
||||
xor t0, cur_flag, prev_flag
|
||||
beqz t0, gl_clip_no_intersection
|
||||
move p0, cur_vtx
|
||||
|
||||
# Swap the two points if necessary to make intersection calculation consistent
|
||||
# This will make sure p0 is always inside and p1 is always outside
|
||||
bnez prev_flag, gl_clip_no_swap
|
||||
move p1, prev_vtx
|
||||
xor p0, p0, p1
|
||||
xor p1, p0, p1
|
||||
xor p0, p0, p1
|
||||
|
||||
#undef prev_flag
|
||||
|
||||
gl_clip_no_swap:
|
||||
# Calculate intersection of the line segment and the plane
|
||||
|
||||
li t0, %lo(RDPQ_CMD_STAGING)
|
||||
lqv vcache0, 0,t0
|
||||
lqv vcache1, 16,t0
|
||||
|
||||
# Repeat plane coefficients twice
|
||||
ldv vplane.e0, 0,plane
|
||||
ldv vplane.e4, 0,plane
|
||||
|
||||
# vpos: x0 y0 z0 w0 x1 y1 z1 w1
|
||||
ldv vpos_i.e0, SCREEN_VTX_CS_POSi,p0
|
||||
ldv vpos_f.e0, SCREEN_VTX_CS_POSf,p0
|
||||
ldv vpos_i.e4, SCREEN_VTX_CS_POSi,p1
|
||||
ldv vpos_f.e4, SCREEN_VTX_CS_POSf,p1
|
||||
|
||||
# vint: x1 y1 z1 w1
|
||||
ldv vint_i.e0, SCREEN_VTX_CS_POSi,p1
|
||||
ldv vint_f.e0, SCREEN_VTX_CS_POSf,p1
|
||||
|
||||
# vattr0: r0 g0 b0 a0 s0 t0
|
||||
luv vattr0.e0, SCREEN_VTX_RGBA ,p0
|
||||
llv vattr0.e4, SCREEN_VTX_S_T ,p0
|
||||
|
||||
# vattr1: r1 g1 b1 a1 s1 t1
|
||||
luv vattr1.e0, SCREEN_VTX_RGBA ,p1
|
||||
llv vattr1.e4, SCREEN_VTX_S_T ,p1
|
||||
|
||||
# Find first free slot in clip cache
|
||||
|
||||
# Add the values from the "used slots map" to the cache offsets
|
||||
# After this, each lane will contain the offset of its corresponding cache slot,
|
||||
# but only if the slot is not used. If it is used, it will contain some large value.
|
||||
vaddc vcache0, voff0
|
||||
vaddc vcache1, voff1
|
||||
|
||||
# Look for the smallest value, which will end up in vcache.e0
|
||||
# Because used slots are marked as large values, they will never be found.
|
||||
vlt vcache0, vcache0.q1
|
||||
vlt vcache0, vcache0.h2
|
||||
vlt vcache0, vcache0.e4
|
||||
vlt vcache0, vcache1.e0
|
||||
|
||||
mfc2 t0, vcache0.e0
|
||||
|
||||
# Mark slot as used by storing some large value (careful of overflows!)
|
||||
li t1, 0xFF
|
||||
sh t1, %lo(RDPQ_CMD_STAGING)-2(t0)
|
||||
|
||||
# t0 is the index multiplied by 2
|
||||
# intersection = t0 * 20 = t0 * 16 + t0 * 4
|
||||
sll intersection, t0, 4
|
||||
sll t1, t0, 2
|
||||
add intersection, t1
|
||||
|
||||
# CAUTION: intersection might point to the same address as either p0 or p1,
|
||||
# because one of them is the previous point, which could have been marked unused
|
||||
# in the previous iteration. As long as we don't access p0 or p1 after writing to
|
||||
# intersection, this is fine.
|
||||
addi intersection, %lo(CLIP_CACHE) - SCREEN_VTX_SIZE
|
||||
|
||||
# Store the cache offset in unused memory (used later when finding the cache slot to mark as unused)
|
||||
sb t0, SCREEN_VTX_PADDING(intersection)
|
||||
|
||||
# Compute dot products of both positions with the clip plane
|
||||
# vdot.e0: d0 = dot(p0, plane)
|
||||
# vdot.e4: d1 = dot(p1, plane)
|
||||
vmudn vdot_f, vpos_f, vplane
|
||||
vmadh vdot_i, vpos_i, vplane
|
||||
vaddc vdot_f, vdot_f.q1
|
||||
vadd vdot_i, vdot_i.q1
|
||||
vaddc vdot_f, vdot_f.h2
|
||||
vadd vdot_i, vdot_i.h2
|
||||
|
||||
# d0 - d1
|
||||
vsubc vdiff_f, vdot_f, vdot_f.e4
|
||||
vsub vdiff_i, vdot_i, vdot_i.e4
|
||||
|
||||
# 1 / (d0 - d1)
|
||||
vrcph v__.e0, vdiff_i.e0
|
||||
vrcpl va_f.e0, vdiff_f.e0
|
||||
vrcph va_i.e0, vzero.e0
|
||||
|
||||
# a = d0 / (d0 - d1)
|
||||
vmudl v__, va_f, vdot_f.e0
|
||||
vmadm v__, va_i, vdot_f.e0
|
||||
vmadn va_f, va_f, vdot_i.e0
|
||||
|
||||
# Prepare 0x7FFF in va_i.e0
|
||||
vsubc va_i, vshift8, K1
|
||||
|
||||
# a = min(a, 1)
|
||||
vge v__, va_f, vzero
|
||||
vmrg va_f, va_f, va_i.e0
|
||||
|
||||
# Account for right shift introduced by vrcp
|
||||
vmudn va_f, va_f, K2
|
||||
|
||||
# p1 - p0
|
||||
vsubc vint_f, vpos_f
|
||||
vsub vint_i, vpos_i
|
||||
# attr1 - attr0
|
||||
vsubc vattr1, vattr0
|
||||
|
||||
# Result of linear interpolation:
|
||||
# p0 + a * (p1 - p0)
|
||||
vmudl v__, vint_f, va_f.e0
|
||||
vmadm v__, vint_i, va_f.e0
|
||||
vmadn vint_f, vpos_f, K1
|
||||
vmadh vint_i, vpos_i, K1
|
||||
|
||||
# a * (attr1 - attr0)
|
||||
vmudm vattr1, vattr1, va_f.e0
|
||||
|
||||
# attr0 + a * (attr1 - attr0)
|
||||
vaddc vattr0, vattr1
|
||||
|
||||
# Store results
|
||||
sdv vint_i.e0, SCREEN_VTX_CS_POSi,intersection
|
||||
sdv vint_f.e0, SCREEN_VTX_CS_POSf,intersection
|
||||
suv vattr0.e0, SCREEN_VTX_RGBA ,intersection
|
||||
jal GL_CalcClipCodes
|
||||
slv vattr0.e4, SCREEN_VTX_S_T ,intersection
|
||||
|
||||
# Add intersection to the output list
|
||||
add t0, out_list, out_count
|
||||
sh intersection, 0(t0)
|
||||
addi out_count, 2
|
||||
|
||||
gl_clip_no_intersection:
|
||||
# If cur_vtx is inside, add it to the output list
|
||||
bnez cur_flag, gl_clip_no_current
|
||||
add t0, out_list, out_count
|
||||
sh cur_vtx, 0(t0)
|
||||
b gl_clip_edge_loop_end
|
||||
addi out_count, 2
|
||||
|
||||
#undef cur_flag
|
||||
|
||||
gl_clip_no_current:
|
||||
# Check if the vertex is stored in the clip cache
|
||||
lbu t0, SCREEN_VTX_PADDING(cur_vtx)
|
||||
beqz t0, gl_clip_edge_loop_end
|
||||
# Reset the padding field to zero, so the screen space values won't be recalculated below
|
||||
sb zero, SCREEN_VTX_PADDING(cur_vtx)
|
||||
# If so, mark it as unused
|
||||
sh zero, %lo(RDPQ_CMD_STAGING)-2(t0)
|
||||
|
||||
gl_clip_edge_loop_end:
|
||||
# Advance to the next edge
|
||||
addi cur_ptr, 2
|
||||
blt cur_ptr, in_end, gl_clip_edge_loop
|
||||
addi prev_ptr, cur_ptr, -2
|
||||
|
||||
gl_clip_plane_loop_end:
|
||||
# Advance to the next clipping plane
|
||||
sll plane_flag, 1
|
||||
blt plane_flag, (1<<CLIPPING_PLANE_COUNT), gl_clip_plane_loop
|
||||
addi plane, CLIPPING_PLANE_SIZE
|
||||
|
||||
#define cache_vtx s3
|
||||
#define cache_end s5
|
||||
|
||||
# Calculate screen space values for new vertices (in the clip cache)
|
||||
# TODO: maybe iterate over out_list instead
|
||||
li cache_vtx, %lo(CLIP_CACHE)
|
||||
li cache_end, %lo(CLIP_CACHE_END) - SCREEN_VTX_SIZE
|
||||
gl_clip_finalize_loop:
|
||||
lbu t0, SCREEN_VTX_PADDING(cache_vtx)
|
||||
neg t0
|
||||
|
||||
# Only calculate screen space values if the vertex is actually used
|
||||
ldv vint_i, SCREEN_VTX_CS_POSi,cache_vtx
|
||||
bltzal t0, GL_CalcScreenSpace
|
||||
ldv vint_f, SCREEN_VTX_CS_POSf,cache_vtx
|
||||
|
||||
blt cache_vtx, cache_end, gl_clip_finalize_loop
|
||||
addi cache_vtx, SCREEN_VTX_SIZE
|
||||
|
||||
gl_clip_return:
|
||||
# Done!
|
||||
jr ra2
|
||||
add s2, out_list, out_count
|
||||
|
||||
#undef cache_vtx
|
||||
#undef cache_end
|
||||
#undef clip_flags
|
||||
#undef plane_flag
|
||||
#undef in_count
|
||||
#undef out_count
|
||||
#undef in_end
|
||||
#undef intersection
|
||||
#undef in_list
|
||||
#undef out_list
|
||||
#undef plane
|
||||
#undef cur_ptr
|
||||
#undef prev_ptr
|
||||
#undef cur_vtx
|
||||
#undef prev_vtx
|
||||
#undef p0
|
||||
#undef p1
|
||||
#undef vtx1
|
||||
#undef vtx2
|
||||
#undef vtx3
|
||||
#undef vplane
|
||||
#undef vpos_i
|
||||
#undef vpos_f
|
||||
#undef vdot_i
|
||||
#undef vdot_f
|
||||
#undef vdiff_i
|
||||
#undef vdiff_f
|
||||
#undef va_f
|
||||
#undef vint_i
|
||||
#undef vint_f
|
||||
#undef vattr0
|
||||
#undef vattr1
|
||||
#undef v__
|
||||
|
||||
.endfunc
|
||||
Reference in New Issue
Block a user