Latest compatible version of Classicube from the original GitHub repository (https://github.com/ClassiCube/ClassiCube) that can be compiled on Classicube for PowerMac PPC running Mac OS X 10.4.
This commit is contained in:
96
misc/dreamcast/Makefile
Normal file
96
misc/dreamcast/Makefile
Normal file
@@ -0,0 +1,96 @@
|
||||
ifeq ($(strip $(KOS_BASE)),)
|
||||
$(warning Please set KOS variables in your environment. For example:)
|
||||
$(warning source /opt/toolchains/dc/kos/environ.sh)
|
||||
$(error Failed to find KallistiOS installation)
|
||||
endif
|
||||
|
||||
|
||||
#---------------------------------------------------------------------------------
|
||||
# Configurable options
|
||||
#---------------------------------------------------------------------------------
|
||||
# Directory where object files are placed
|
||||
BUILD_DIR := build/dc
|
||||
# List of directories containing source code
|
||||
SOURCE_DIRS := src third_party/bearssl/src misc/dreamcast
|
||||
# Name of the final output
|
||||
TARGET := ClassiCube-dc
|
||||
# Additional libraries to link against
|
||||
LIBS = -lm -lppp -lkosfat
|
||||
# List of directories containing more header files
|
||||
INCLUDES = -Ithird_party/bearssl/inc
|
||||
|
||||
|
||||
#---------------------------------------------------------------------------------
|
||||
# Code generation
|
||||
#---------------------------------------------------------------------------------
|
||||
S_FILES := $(foreach dir,$(SOURCE_DIRS),$(wildcard $(dir)/*.S))
|
||||
C_FILES := $(foreach dir,$(SOURCE_DIRS),$(wildcard $(dir)/*.c))
|
||||
OBJS := $(addprefix $(BUILD_DIR)/, $(notdir $(C_FILES:%.c=%.o) $(S_FILES:%.S=%.o)))
|
||||
CFLAGS := -g -DNDEBUG -O3 -fipa-pta -fno-pie -flto=auto -fomit-frame-pointer -fbuiltin -ffast-math -ffp-contract=fast -mfsrra -mfsca -pipe -fno-math-errno
|
||||
LDFLAGS = -g
|
||||
|
||||
# Dependency tracking
|
||||
DEPFLAGS = -MT $@ -MMD -MP -MF $(BUILD_DIR)/$*.d
|
||||
DEPFILES := $(OBJS:%.o=%.d)
|
||||
|
||||
|
||||
#---------------------------------------------------------------------------------
|
||||
# Main targets
|
||||
#---------------------------------------------------------------------------------
|
||||
default: $(BUILD_DIR) $(TARGET).cdi
|
||||
|
||||
clean:
|
||||
rm $(TARGET).cdi $(TARGET).iso $(TARGET).elf $(TARGET)-scr.bin $(TARGET).bin $(OBJS)
|
||||
|
||||
$(BUILD_DIR):
|
||||
mkdir -p $(BUILD_DIR)
|
||||
|
||||
|
||||
#---------------------------------------------------------------------------------
|
||||
# Executable generation
|
||||
#---------------------------------------------------------------------------------
|
||||
$(TARGET).elf: $(OBJS)
|
||||
kos-cc $(LDFLAGS) $^ -o $@ $(LIBS)
|
||||
|
||||
$(TARGET).bin: $(TARGET).elf
|
||||
sh-elf-objcopy -R .stack -O binary $(TARGET).elf $(TARGET).bin
|
||||
|
||||
# https://dcemulation.org/phpBB/viewtopic.php?t=105269
|
||||
$(TARGET)-scr.bin: $(TARGET).bin
|
||||
$(KOS_BASE)/utils/scramble/scramble $(TARGET).bin $(TARGET)-scr.bin
|
||||
|
||||
$(TARGET).iso: $(TARGET)-scr.bin
|
||||
mkdir -p ISO_FILES
|
||||
cp $(TARGET)-scr.bin ISO_FILES/1ST_READ.BIN
|
||||
mkdir -p ISO_FILES/audio
|
||||
mkdir -p ISO_FILES/maps
|
||||
mkdir -p ISO_FILES/texpacks
|
||||
mkdir -p ISO_FILES/texturecache
|
||||
cp misc/dreamcast/classicube.zip ISO_FILES/texpacks/default.zip
|
||||
cp misc/dreamcast/IP.BIN IP.BIN
|
||||
mkisofs -G IP.BIN -C 0,11702 -J -l -r -quiet -o $(TARGET).iso ISO_FILES
|
||||
# genisoimage -V ClassiCube -G IP.BIN -joliet -rock -l -o $(TARGET).iso ISO_FILES
|
||||
|
||||
$(TARGET).cdi: $(TARGET).iso
|
||||
cdi4dc $(TARGET).iso $(TARGET).cdi
|
||||
|
||||
|
||||
#---------------------------------------------------------------------------------
|
||||
# Object generation
|
||||
#---------------------------------------------------------------------------------
|
||||
$(BUILD_DIR)/%.o: src/%.c
|
||||
kos-cc $(CFLAGS) $(INCLUDES) $(DEPFLAGS) -c $< -o $@
|
||||
|
||||
$(BUILD_DIR)/%.o: third_party/bearssl/src/%.c
|
||||
kos-cc $(CFLAGS) $(INCLUDES) -c $< -o $@
|
||||
|
||||
$(BUILD_DIR)/%.o: misc/dreamcast/%.S
|
||||
kos-cc $(DEPFLAGS) -c $< -o $@
|
||||
|
||||
|
||||
#---------------------------------------------------------------------------------
|
||||
# Dependency tracking
|
||||
#---------------------------------------------------------------------------------
|
||||
$(DEPFILES):
|
||||
|
||||
include $(wildcard $(DEPFILES))
|
||||
196
misc/dreamcast/VertexClip.S
Normal file
196
misc/dreamcast/VertexClip.S
Normal file
@@ -0,0 +1,196 @@
|
||||
! r1 = v1, CLOBBERS
|
||||
! r2 = v2, CLOBBERS
|
||||
! r3 = OUT, CLOBBERS
|
||||
! r4 = TMP 1, preserved
|
||||
! r5 = TMP 2, preserved
|
||||
! r0 = CL0, CLOBBERS
|
||||
|
||||
! FR0 = 0
|
||||
! FR1 = 0
|
||||
! FR2 = A.1
|
||||
! FR3 = B.1
|
||||
! FR4 = 0
|
||||
! FR5 = 0
|
||||
! FR6 = A.2
|
||||
! FR7 = B.2
|
||||
! FR8 = 0
|
||||
! FR9 = 0
|
||||
! FR10 = invT
|
||||
! FR11 = t
|
||||
|
||||
#define TM1 r0 // CLOBBERED, temp register 1
|
||||
#define TM2 r4 // PRESERVED, temp register 2
|
||||
#define CLO r5 // PRESERVED, output colour
|
||||
|
||||
#define IN1 r1 // CLOBBERED, input vertex 1
|
||||
#define IN2 r2 // CLOBBERED, input vertex 2
|
||||
#define OUT r3 // CLOBBERED, output vertex
|
||||
|
||||
#define CL1 r1 // CLOBBERED, input colour 1
|
||||
#define CL2 r2 // CLOBBERED, input colour 2
|
||||
|
||||
! Calculates the near plane intersection point between two points:
|
||||
! float t = fabsf(v1->z) / fabsf(v2->z - v1->z)
|
||||
! float invt = 1.0f - t;
|
||||
!
|
||||
! out->x = invt * v1->x + t * v2->x;
|
||||
! out->y = invt * v1->y + t * v2->y;
|
||||
! out->z = 0.0f; // clipped against near plane anyways (I.e Z/W = 0 --> Z = 0)
|
||||
!
|
||||
! out->u = invt * v1->u + t * v2->u;
|
||||
! out->v = invt * v1->v + t * v2->v;
|
||||
! out->w = invt * v1->w + t * v2->w;
|
||||
!
|
||||
! out->b = invt * v1->b + t * v2->b;
|
||||
! out->g = invt * v1->g + t * v2->g;
|
||||
! out->r = invt * v1->r + t * v2->r;
|
||||
! out->a = invt * v1->a + t * v2->a;
|
||||
! To optimise these calculations, FIPR is used:
|
||||
! FIPR = FVm.x*FVn.x + FVm.y*FVn.x + FVm.z*FVn.z + FVm.w*FVn.w --> FVn.w
|
||||
! FIPR can be used to accomplish "vout->Q = invt * v1->Q + t * v2->Q" by:
|
||||
! - assigning x/y components to 0 for both vectors
|
||||
! - assigning t and invT to z/w of FVm vector
|
||||
! - assigning v1 and v2 to z/w of FVn vector
|
||||
! FIPR = 0*0 + 0*0 + t*v1->Q + invT*v2->Q --> FVn.w
|
||||
! FIPR = t*v1->Q + invT*v2->Q --> FVn.w
|
||||
|
||||
.global _ClipLine
|
||||
.align 4
|
||||
_ClipLine:
|
||||
mov.l r4,@-r15 ! LS, push(r4)
|
||||
mov.l r5,@-r15 ! LS, push(r5)
|
||||
mov IN1, TM1 ! MT, tmp = &v1
|
||||
fldi0 fr4 ! LS, fr4 = 0
|
||||
add #12, TM1 ! EX, tmp = &v1->z
|
||||
fmov.s @TM1, fr2 ! LS, fr2 = v1->z
|
||||
mov IN2, TM1 ! MT, tmp = &v2
|
||||
fldi0 fr5 ! LS, fr5 = 0
|
||||
add #12, TM1 ! EX, tmp = &v2->z
|
||||
fmov.s @TM1,fr11 ! LS, fr11 = v2->z
|
||||
fsub fr2,fr11 ! FE, fr11 = v2->z - v1->z
|
||||
fldi0 fr8 ! LS, fr8 = 0
|
||||
fmul fr11,fr11 ! FE, fr11 = (v2->z - v1->z) * (v2->z * v1->z)
|
||||
fldi0 fr9 ! LS, fr9 = 0
|
||||
fldi0 fr0 ! LS, fr0 = 0
|
||||
fldi0 fr1 ! LS, fr1 = 0
|
||||
fsrra fr11 ! FE, fr11 = 1 / abs(v2->z - v1->z)
|
||||
fabs fr2 ! LS, fr2 = abs(v1->z)
|
||||
fmul fr2,fr11 ! FE, fr11 = abs(v1->Z) / abs(v2->z - v1->z) --> t
|
||||
add #4, IN1 ! EX, v1 += 4
|
||||
fldi1 fr10 ! LS, fr10 = 1
|
||||
add #4, IN2 ! EX, v2 += 4
|
||||
add #4, OUT ! EX, OUT += 4
|
||||
fsub fr11,fr10 ! FE, fr10 = 1.0 - t --> invT
|
||||
|
||||
fmov.s @IN1+, fr2 ! LS, A1 = v1->x, v1 += 4
|
||||
fmov.s @IN2+, fr3 ! LS, B1 = v2->x, v2 += 4
|
||||
fipr fv8, fv0 ! FE, LERP(A1, B1)
|
||||
fmov.s @IN1+, fr6 ! LS, A2 = v1->y, v1 += 4
|
||||
fmov.s @IN2+, fr7 ! LS, B2 = v2->y, v2 += 4
|
||||
|
||||
fmov.s fr3,@OUT ! LS, OUT->x = LERP
|
||||
add #4, OUT ! EX, OUT += 4
|
||||
fipr fv8, fv4 ! FE, LERP(A2, B2)
|
||||
add #4, IN1 ! EX, v1 += 4
|
||||
add #4, IN2 ! EX, v2 += 4
|
||||
|
||||
fmov.s fr7,@OUT ! LS, OUT->y = LERP
|
||||
add #4, OUT ! EX, OUT += 4
|
||||
fmov.s fr1,@OUT ! LS, OUT->z = 0
|
||||
add #4, OUT ! EX, OUT += 4
|
||||
|
||||
fmov.s @IN1+, fr2 ! LS, A1 = v1->u, v1 += 4
|
||||
fmov.s @IN2+, fr3 ! LS, B1 = v2->u, v2 += 4
|
||||
fipr fv8, fv0 ! FE, LERP(A1, B1)
|
||||
fmov.s @IN1+, fr6 ! LS, A2 = v1->v, v1 += 4
|
||||
fmov.s @IN2+, fr7 ! LS, B2 = v2->v, v2 += 4
|
||||
|
||||
fmov.s fr3,@OUT ! LS, OUT->u = LERP
|
||||
add #4, OUT ! EX, OUT += 4
|
||||
fipr fv8, fv4 ! FE, LERP(A2, B2)
|
||||
add #4, IN1 ! EX, v1 += 4
|
||||
add #4, IN2 ! EX, v2 += 4
|
||||
fmov.s @IN1,fr2 ! LS, A1 = v1->w
|
||||
fmov.s @IN2,fr3 ! LS, B1 = v2->w
|
||||
fmov.s fr7,@OUT ! LS, OUT->v = LERP
|
||||
add #8, OUT ! EX, OUT += 8
|
||||
|
||||
fipr fv8, fv0 ! FE, LERP(A1, B1)
|
||||
add #-4, IN1 ! EX, v1 -= 4
|
||||
add #-4, IN2 ! EX, v2 -= 4
|
||||
fmov.s fr3,@OUT ! LS, OUT->w = lerp
|
||||
add #-4, OUT ! EX, OUT -= 4
|
||||
|
||||
mov.l @IN1,CL1 ! LS, ACOLOR = v1->bgra
|
||||
mov.l @IN2,CL2 ! LS, BCOLOR = v2->bgra
|
||||
! Bypass interpolation if unnecessary
|
||||
cmp/eq CL1,CL2 ! MT, T = ACOLOR == BCOLOR
|
||||
bt.s 1f ! BR, if (T) goto 1;
|
||||
mov CL1,CLO ! MT, OUTCOLOR = ACOLOR (branch delay instruction)
|
||||
! Interpolate B
|
||||
extu.b CL1,TM1 ! EX, val = ACOLOR.b
|
||||
lds TM1,fpul ! CO, FPUL = val
|
||||
float fpul,fr2 ! EX, fr2 = float(FPUL)
|
||||
extu.b CL2,TM1 ! EX, val = BCOLOR.b
|
||||
lds TM1,fpul ! CO, FPUL = val
|
||||
float fpul,fr3 ! EX, fr3 = float(FPUL)
|
||||
fipr fv8, fv0 ! FE, LERP(A1, B1)
|
||||
shlr8 CL1 ! EX, ACOLOR >>= 8
|
||||
ftrc fr3,fpul ! FE, FPUL = int(lerp)
|
||||
shlr8 CL2 ! EX, BCOLOR >>= 8
|
||||
sts fpul,TM2 ! CO, tmp = FPUL
|
||||
! Interpolate G
|
||||
extu.b CL1,TM1 ! EX, val = ACOLOR.g
|
||||
lds TM1,fpul ! CO, FPUL = val
|
||||
float fpul,fr2 ! EX, fr2 = float(FPUL)
|
||||
extu.b CL2,TM1 ! EX, val = BCOLOR.g
|
||||
lds TM1,fpul ! CO, FPUL = val
|
||||
float fpul,fr3 ! EX, fr3 = float(FPUL)
|
||||
fipr fv8, fv0 ! FE, LERP(A1, B1)
|
||||
shlr8 CL1 ! EX, ACOLOR >>= 8
|
||||
ftrc fr3,fpul ! FE, FPUL = int(lerp)
|
||||
extu.b TM2,TM2 ! EX, tmp = (uint8)tmp
|
||||
mov TM2,CLO ! MT, OUTCOLOR.b = tmp
|
||||
shlr8 CL2 ! EX, BCOLOR >>= 8
|
||||
sts fpul,TM2 ! CO, tmp = FPUL
|
||||
! Interpolate R
|
||||
extu.b CL1,TM1 ! EX, val = ACOLOR.r
|
||||
lds TM1,fpul ! CO, FPUL = val
|
||||
float fpul,fr2 ! EX, fr2 = float(FPUL)
|
||||
extu.b CL2,TM1 ! EX, val = BCOLOR.r
|
||||
lds TM1,fpul ! CO, FPUL = val
|
||||
float fpul,fr3 ! EX, fr3 = float(FPUL)
|
||||
fipr fv8, fv0 ! FE, LERP(A1, B1)
|
||||
shlr8 CL1 ! EX, ACOLOR >>= 8
|
||||
ftrc fr3,fpul ! FE, FPUL = int(lerp)
|
||||
extu.b TM2,TM2 ! EX, tmp = (uint8)tmp
|
||||
shll8 TM2 ! EX, tmp <<= 8
|
||||
or TM2,CLO ! EX, OUTCOLOR.g |= tmp
|
||||
shlr8 CL2 ! EX, BCOLOR >>= 8
|
||||
sts fpul,TM2 ! CO, tmp = FPUL
|
||||
! Interpolate A
|
||||
extu.b CL1,TM1 ! EX, val = ACOLOR.a
|
||||
lds TM1,fpul ! CO, FPUL = val
|
||||
float fpul,fr2 ! EX, fr2 = float(FPUL)
|
||||
extu.b CL2,TM1 ! EX, val = BCOLOR.a
|
||||
lds TM1,fpul ! CO, FPUL = val
|
||||
float fpul,fr3 ! EX, fr3 = float(FPUL)
|
||||
fipr fv8, fv0 ! FE, LERP(A1, B1)
|
||||
ftrc fr3,fpul ! FE, FPUL = int(lerp)
|
||||
extu.b TM2,TM2 ! EX, tmp = (uint8)tmp
|
||||
shll16 TM2 ! EX, tmp <<= 16
|
||||
or TM2,CLO ! EX, OUTCOLOR.r |= tmp
|
||||
sts fpul,TM2 ! CO, tmp = FPUL
|
||||
extu.b TM2,TM2 ! EX, tmp = (uint8)tmp
|
||||
shll16 TM2 ! EX, tmp <<= 16
|
||||
shll8 TM2 ! EX, tmp <<= 8
|
||||
or TM2,CLO ! EX, OUTCOLOR.a |= tmp
|
||||
|
||||
1:
|
||||
mov.l CLO,@OUT ! LS, OUT->color = OUTCOLOR
|
||||
mov.l @r15+,r5 ! LS, pop(r5)
|
||||
rts ! CO, return after executing instruction in delay slot
|
||||
mov.l @r15+,r4 ! LS, pop(r4)
|
||||
|
||||
.size _ClipLine, .-_ClipLine
|
||||
.type _ClipLine,%function
|
||||
210
misc/dreamcast/VertexClip2.S
Normal file
210
misc/dreamcast/VertexClip2.S
Normal file
@@ -0,0 +1,210 @@
|
||||
! Calculates vertex as the near plane intersection point between two points:
|
||||
! float t = fabsf(v1->z) / fabsf(v2->z - v1->z)
|
||||
! float w = (1 - t) * v1->w + t * v2->w;
|
||||
!
|
||||
! out->c = type << 24
|
||||
! out->x = ((1 - t) * v1->x + t * v2->x) * 1/w
|
||||
! out->y = ((1 - t) * v1->y + t * v2->y) * 1/w
|
||||
! out->w = 1/w
|
||||
!
|
||||
! out->u = (1 - t) * v1->u + t * v2->u;
|
||||
! out->v = (1 - t) * v1->v + t * v2->v;
|
||||
!
|
||||
! out->b = (1 - t) * v1->b + t * v2->b;
|
||||
! out->g = (1 - t) * v1->g + t * v2->g;
|
||||
! out->r = (1 - t) * v1->r + t * v2->r;
|
||||
! out->a = (1 - t) * v1->a + t * v2->a;
|
||||
|
||||
! Optimisations:
|
||||
! - w always ends up being zNear
|
||||
! - Calculations of (1 - t) * v1 + t * v2 can be rearranged to t * (v2 - v1) + v1
|
||||
! - These rearranges calculations can then take advantage of FMAC
|
||||
|
||||
! Final calculation:
|
||||
! out->c = type << 24
|
||||
! out->x = ((v2->x - v1->x) + v1->x) * 1/zNear
|
||||
! out->y = ((v2->y - v1->y) + v1->y) * 1/zNear
|
||||
! out->w = 1/zNear
|
||||
!
|
||||
! out->u = (v2->u - v1->u) + v1->u;
|
||||
! out->v = (v2->v - v1->v) + v1->v;
|
||||
!
|
||||
! out->b = (v2->b - v1->b) + v1->b;
|
||||
! out->g = (v2->g - v1->g) + v1->g;
|
||||
! out->r = (v2->r - v1->r) + v1->r;
|
||||
! out->a = (v2->a - v1->a) + v1->a;
|
||||
|
||||
! INPUT ARGUMENTS
|
||||
#define IN1 r4 // input vertex 1
|
||||
#define IN2 r5 // input vertex 2
|
||||
#define OUT r6 // output vertex
|
||||
#define TYP r7 // type/flags for output vertex
|
||||
|
||||
#define TM1 r1 // temp register 1
|
||||
#define TM2 r3 // temp register 2
|
||||
#define CL1 r4 // input colour 1
|
||||
#define CL2 r5 // input colour 2
|
||||
#define CLO r7 // output colour
|
||||
|
||||
#define F_T fr0
|
||||
#define F_W fr1
|
||||
#define F_X1 fr2
|
||||
#define F_X2 fr3
|
||||
#define F_Y1 fr4
|
||||
#define F_Y2 fr5
|
||||
#define F_U1 fr6
|
||||
#define F_U2 fr7
|
||||
#define F_V1 fr8
|
||||
#define F_V2 fr9
|
||||
#define F_Z1 fr10
|
||||
#define F_Z2 fr11
|
||||
#define Ftmp fr11
|
||||
|
||||
.global _ClipEdge
|
||||
.align 4
|
||||
_ClipEdge:
|
||||
fschg ! FE (swap to 32 bit FPU loads/stores)
|
||||
! Start calculating interpolation factor
|
||||
add #28, IN1 ! EX, IN1 = &v1->z
|
||||
mov.l _NEAR_CLIP_W,TM1 ! LS, tmp = invW (1/zNear)
|
||||
fmov.s @IN1, F_Z1 ! LS, Z1 = v1->z
|
||||
add #28, IN2 ! EX, IN = &v2->z
|
||||
fmov.s @IN2, F_Z2 ! LS, Z2 = v2->z
|
||||
shll16 TYP ! EX, TYP <<= 16
|
||||
fsub F_Z1, Ftmp ! FE, tmp = v2->z - v1->z
|
||||
! Load W
|
||||
lds TM1,fpul ! LS, FPUL = invW (1/zNear)
|
||||
add #-24, IN1 ! EX, IN1 = &v1->x
|
||||
fsts fpul,F_W ! LS, invW = FPUL
|
||||
add #-24, IN2 ! EX, IN2 = &v2->x
|
||||
! Finish calculating interpolation factor
|
||||
shll8 TYP ! EX, TYP <<= 8
|
||||
fmul Ftmp,Ftmp ! FE, tmp = (v2->z - v1->z) * (v2->z * v1->z)
|
||||
mov.l TYP,@OUT ! LS, dst->cmd = TYPE
|
||||
|
||||
! Load components
|
||||
fmov.s @IN1+, F_X1 ! LS, X1 = v1->x
|
||||
fmov.s @IN2+, F_X2 ! LS, X2 = v2->x
|
||||
fmov.s @IN1+, F_Y1 ! LS, Y1 = y1->x
|
||||
fmov.s @IN2+, F_Y2 ! LS, Y2 = y2->x
|
||||
fsrra Ftmp ! FE, tmp = 1 / abs(v2->z - v1->z)
|
||||
add #4, IN1 ! EX, skip over W
|
||||
fabs F_Z1 ! LS, z1 = abs(v1->z)
|
||||
add #4, IN2 ! EX, skip over W
|
||||
fmov.s @IN1+, F_U1 ! LS, U1 = v1->u
|
||||
fmov.s @IN2+, F_U2 ! LS, U2 = v2->u
|
||||
fmov.s @IN1+, F_V1 ! LS, V1 = v1->v
|
||||
fmul F_Z1,Ftmp ! FE, tmp = abs(v1->Z) / abs(v2->z - v1->z)
|
||||
fmov.s @IN2+, F_V2 ! LS, V2 = v2->v
|
||||
|
||||
! Interpolate vertices
|
||||
fsub F_X1, F_X2 ! FE, X2 = X2 - X1
|
||||
fsub F_Y1, F_Y2 ! FE, Y2 = Y2 - Y1
|
||||
fsub F_U1, F_U2 ! FE, U2 = U2 - U1
|
||||
fmov Ftmp, F_T ! LS, T = tmp
|
||||
fsub F_V1, F_V2 ! FE, V2 = V2 - V1
|
||||
|
||||
fmac F_T,F_X2,F_X1 ! FE, X = T * (X2 - X1) + X1
|
||||
fmac F_T,F_Y2,F_Y1 ! FE, Y = T * (Y2 - Y1) + Y1
|
||||
fmac F_T,F_U2,F_U1 ! FE, U = T * (U2 - U1) + U1
|
||||
fmac F_T,F_V2,F_V1 ! FE, V = T * (V2 - V1) + V1
|
||||
|
||||
! Adjust by w
|
||||
fmul F_W, F_X1 ! FE, x = invW * x
|
||||
fmul F_W, F_Y1 ! FE, x = invY * x
|
||||
|
||||
! Load colours and check if equal
|
||||
mov.l @IN1,CL1 ! LS, ACOLOR = v1->bgra
|
||||
mov.l @IN2,CL2 ! LS, BCOLOR = v2->bgra
|
||||
cmp/eq CL1,CL2 ! MT, T = ACOLOR == BCOLOR
|
||||
add #28, OUT ! EX, dst = &dst->padding
|
||||
|
||||
! Bypass RGBA interpolation if unnecessary
|
||||
bt.s 1f ! BR, if (T) goto 1;
|
||||
mov CL1,CLO ! MT, OUTCOLOR = ACOLOR (branch delay instruction)
|
||||
|
||||
! Interpolate B
|
||||
extu.b CL1,TM1 ! EX, val = ACOLOR.b
|
||||
lds TM1,fpul ! LS, FPUL = val
|
||||
float fpul,F_Z1 ! FE, C1 = float(val)
|
||||
extu.b CL2,TM1 ! EX, val = BCOLOR.b
|
||||
lds TM1,fpul ! LS, FPUL = val
|
||||
float fpul,F_Z2 ! FE, C2 = float(val)
|
||||
fsub F_Z1, F_Z2 ! FE, C2 = C2 - C1
|
||||
fmac F_T,F_Z2,F_Z1 ! FE, C = T * (C2 - C1) + C1
|
||||
shlr8 CL1 ! EX, ACOLOR >>= 8
|
||||
shlr8 CL2 ! EX, BCOLOR >>= 8
|
||||
ftrc F_Z1,fpul ! FE, FPUL = int(C)
|
||||
sts fpul,TM2 ! LS, tmp = FPUL
|
||||
|
||||
! Interpolate G
|
||||
extu.b CL1,TM1 ! EX, val = ACOLOR.g
|
||||
lds TM1,fpul ! LS, FPUL = val
|
||||
float fpul,F_Z1 ! FE, C1 = float(val)
|
||||
extu.b CL2,TM1 ! EX, val = BCOLOR.g
|
||||
lds TM1,fpul ! LS, FPUL = val
|
||||
float fpul,F_Z2 ! FE, C2 = float(val)
|
||||
fsub F_Z1, F_Z2 ! FE, C2 = C2 - C1
|
||||
fmac F_T,F_Z2,F_Z1 ! FE, C = T * (C2 - C1) + C1
|
||||
shlr8 CL1 ! EX, ACOLOR >>= 8
|
||||
extu.b TM2,TM2 ! EX, tmp = (uint8)tmp
|
||||
mov TM2,CLO ! MT, OUTCOLOR.b = tmp
|
||||
shlr8 CL2 ! EX, BCOLOR >>= 8
|
||||
ftrc F_Z1,fpul ! FE, FPUL = int(C)
|
||||
sts fpul,TM2 ! LS, tmp = FPUL
|
||||
|
||||
! Interpolate R
|
||||
extu.b CL1,TM1 ! EX, val = ACOLOR.r
|
||||
lds TM1,fpul ! LS, FPUL = val
|
||||
float fpul,F_Z1 ! FE, C1 = float(val)
|
||||
extu.b CL2,TM1 ! EX, val = BCOLOR.r
|
||||
lds TM1,fpul ! LS, FPUL = val
|
||||
float fpul,F_Z2 ! FE, C2 = float(val)
|
||||
fsub F_Z1, F_Z2 ! FE, C2 = C2 - C1
|
||||
fmac F_T,F_Z2,F_Z1 ! FE, C = T * (C2 - C1) + C1
|
||||
shlr8 CL1 ! EX, ACOLOR >>= 8
|
||||
extu.b TM2,TM2 ! EX, tmp = (uint8)tmp
|
||||
shll8 TM2 ! EX, tmp <<= 8
|
||||
or TM2,CLO ! EX, OUTCOLOR.g |= tmp
|
||||
shlr8 CL2 ! EX, BCOLOR >>= 8
|
||||
ftrc F_Z1,fpul ! FE, FPUL = int(C)
|
||||
sts fpul,TM2 ! LS, tmp = FPUL
|
||||
|
||||
! Interpolate A
|
||||
extu.b CL1,TM1 ! EX, val = ACOLOR.a
|
||||
lds TM1,fpul ! LS, FPUL = val
|
||||
float fpul,F_Z1 ! FE, C1 = float(val)
|
||||
extu.b CL2,TM1 ! EX, val = BCOLOR.a
|
||||
lds TM1,fpul ! LS, FPUL = val
|
||||
float fpul,F_Z2 ! FE, C2 = float(val)
|
||||
fsub F_Z1, F_Z2 ! FE, C2 = C2 - C1
|
||||
fmac F_T,F_Z2,F_Z1 ! FE, C = T * (C2 - C1) + C1
|
||||
extu.b TM2,TM2 ! EX, tmp = (uint8)tmp
|
||||
shll16 TM2 ! EX, tmp <<= 16
|
||||
or TM2,CLO ! EX, OUTCOLOR.r |= tmp
|
||||
ftrc F_Z1,fpul ! FE, FPUL = int(C)
|
||||
sts fpul,TM2 ! LS, tmp = FPUL
|
||||
extu.b TM2,TM2 ! EX, tmp = (uint8)tmp
|
||||
shll16 TM2 ! EX, tmp <<= 16
|
||||
shll8 TM2 ! EX, tmp <<= 8
|
||||
or TM2,CLO ! EX, OUTCOLOR.a |= tmp
|
||||
|
||||
1:
|
||||
! Store output
|
||||
mov.l CLO,@-OUT ! LS, dst->color = OUTCOLOR
|
||||
fmov.s F_V1,@-OUT ! LS, dst->v = v
|
||||
fmov.s F_U1,@-OUT ! LS, dst->u = u
|
||||
fmov.s F_W ,@-OUT ! LS, dst->w = invW
|
||||
fmov.s F_Y1,@-OUT ! LS, dst->y = y
|
||||
fmov.s F_X1,@-OUT ! LS, dst->x = x
|
||||
|
||||
fschg ! FE (swap to 64 bit FPU loads/stores)
|
||||
rts ! CO, return after executing instruction in delay slot
|
||||
pref @OUT ! LS, trigger store queue flush
|
||||
.size _ClipEdge, .-_ClipEdge
|
||||
.type _ClipEdge, %function
|
||||
|
||||
.align 4
|
||||
_NEAR_CLIP_W:
|
||||
.float 0
|
||||
.global _NEAR_CLIP_W
|
||||
611
misc/dreamcast/VertexDraw.S
Normal file
611
misc/dreamcast/VertexDraw.S
Normal file
@@ -0,0 +1,611 @@
|
||||
! r8 = STORE_QUEUE
|
||||
! r9 = num vertices left
|
||||
! r10 = PVR_CMD_VERTEX
|
||||
! r11 = PVR_CMD_VERTEX_EOL
|
||||
! r12 = ClipLine function
|
||||
! r13 = cur vertex
|
||||
! r14 = next vertex (prefetch)
|
||||
|
||||
#define R_VTX r10
|
||||
#define R_EOL r11
|
||||
#define REG_CMD_VTX r10
|
||||
#define REG_CMD_EOL r11
|
||||
#define REG_CLIPFUNC r12
|
||||
|
||||
|
||||
.align 4
|
||||
|
||||
! Pushes a vertex to the store queue
|
||||
! CLOBBERS: r2
|
||||
! INPUTS: R (vertex), r8 (SQ global)
|
||||
! OUTPUTS: r8 altered
|
||||
.macro PushVertex R
|
||||
! memcpy(r8, \R, 32)
|
||||
mov.l @(0,\R), r2
|
||||
mov.l r2, @(0,r8)
|
||||
mov.l @(4,\R), r2
|
||||
mov.l r2, @(4,r8)
|
||||
mov.l @(8,\R), r2
|
||||
mov.l r2, @(8,r8)
|
||||
mov.l @(12,\R),r2
|
||||
mov.l r2,@(12,r8)
|
||||
mov.l @(16,\R),r2
|
||||
mov.l r2,@(16,r8)
|
||||
mov.l @(20,\R),r2
|
||||
mov.l r2,@(20,r8)
|
||||
mov.l @(24,\R),r2
|
||||
mov.l r2,@(24,r8)
|
||||
mov.l @(28,\R),r2
|
||||
mov.l r2,@(28,r8)
|
||||
pref @r8 ! LS, Trigger SQ
|
||||
add #32,r8 ! EX, SQ += 32
|
||||
.endm
|
||||
|
||||
! Transforms then pushes a vertex to the store queue
|
||||
! note: Vertices are assumed as pre viewport transformed already
|
||||
! CLOBBERS: r2, fr0, fr4, fr5
|
||||
! INPUTS: R (vertex), r8 (SQ global)
|
||||
! OUTPUTS: R, r8 altered
|
||||
.macro TransformVertex R
|
||||
! INVERSE W CALCULATION
|
||||
add #28, \R ! EX, SRC += 28
|
||||
fmov.s @\R,fr0 ! LS, fr0 = v->w
|
||||
fmul fr0,fr0 ! FE, fr0 = fr0 * fr0
|
||||
add #-28, \R ! EX, SRC -= 28
|
||||
mov.l @\R+, r2 ! LS, tmp = SRC->flags, SRC += 4
|
||||
mov.l r2,@r8 ! LS, DST->flags = tmp
|
||||
fsrra fr0 ! FE, invW = 1 / sqrt(SRC->W * SRC->W)
|
||||
add #4, r8 ! EX, DST += 4
|
||||
|
||||
! COPY U,V
|
||||
mov.l @(12,\R),r2 ! LS, tmp = SRC->u
|
||||
mov.l r2,@(12,r8) ! LS, DST->u = tmp
|
||||
mov.l @(16,\R),r2 ! LS, tmp = SRC->v
|
||||
mov.l r2,@(16,r8) ! LS, DST->v = tmp
|
||||
|
||||
! TRANSFORM X
|
||||
fmov.s @\R,fr4 ! LS, fr4 = SRC->x
|
||||
fmul fr0,fr4 ! FE, fr4 = invW * SRC->x
|
||||
mov.l @(20,\R),r2 ! LS, tmp = SRC->bgra
|
||||
mov.l r2,@(20,r8) ! LS, SRC->bgra = tmp
|
||||
add #4, \R ! EX, SRC += 4
|
||||
fmov.s fr4,@r8 ! LS, DST->x = fr4
|
||||
|
||||
! TRANSFORM Y
|
||||
fmov.s @\R,fr4 ! LS, fr4 = SRC->y
|
||||
add #8, r8 ! EX, DST += 8
|
||||
fmul fr0,fr4 ! FE, fr4 = invW * SRC->y
|
||||
fmov.s fr0,@r8 ! LS, DST->z = invW
|
||||
add #-4, r8 ! EX, DST -= 4
|
||||
add #-8, \R ! EX, src -= 8 (back to start of vertex)
|
||||
fmov.s fr4,@r8 ! LS, DST->y = fr4
|
||||
|
||||
add #-8,r8 ! EX, DST -= 8 (back to start of vertex)
|
||||
pref @r8 ! LS, Trigger SQ
|
||||
add #32,r8 ! EX, SQ += 32
|
||||
.endm
|
||||
|
||||
|
||||
#define REG_CLIP1 r1
|
||||
#define REG_CLIP2 r2
|
||||
|
||||
#define REG_V0 r4
|
||||
#define REG_V1 r5
|
||||
#define REG_V2 r6
|
||||
#define REG_V3 r7
|
||||
|
||||
! r3 also matches out parameter for ClipLine
|
||||
#define REG_TMP r3
|
||||
#define TMP_SET_A \
|
||||
mov r15, REG_TMP
|
||||
|
||||
#define TMP_SET_B \
|
||||
mov r15, REG_TMP; add #32, REG_TMP
|
||||
|
||||
|
||||
_Case_0_0_0_1:
|
||||
! v0
|
||||
! / |
|
||||
! / |
|
||||
! .....A....B...
|
||||
! / |
|
||||
! v3--v2---v1
|
||||
sts pr,r13
|
||||
|
||||
TMP_SET_A
|
||||
mov REG_V3, REG_CLIP1
|
||||
mov REG_V0, REG_CLIP2
|
||||
jsr @REG_CLIPFUNC
|
||||
mov.l REG_CMD_EOL, @REG_TMP
|
||||
|
||||
TMP_SET_B
|
||||
mov REG_V0, REG_CLIP1
|
||||
mov REG_V1, REG_CLIP2
|
||||
jsr @REG_CLIPFUNC
|
||||
mov.l REG_CMD_VTX, @REG_TMP
|
||||
|
||||
TransformVertex REG_V0
|
||||
TMP_SET_B
|
||||
TransformVertex REG_TMP
|
||||
TMP_SET_A
|
||||
TransformVertex REG_TMP
|
||||
|
||||
lds r13,pr
|
||||
rts
|
||||
nop
|
||||
|
||||
_Case_0_0_1_0:
|
||||
! v1
|
||||
! / |
|
||||
! / |
|
||||
! ....A.....B...
|
||||
! / |
|
||||
! v0--v3---v2
|
||||
sts pr,r13
|
||||
|
||||
TMP_SET_A
|
||||
mov REG_V0, REG_CLIP1
|
||||
mov REG_V1, REG_CLIP2
|
||||
jsr @REG_CLIPFUNC
|
||||
mov.l REG_CMD_VTX, @REG_TMP
|
||||
|
||||
TMP_SET_B
|
||||
mov REG_V1, REG_CLIP1
|
||||
mov REG_V2, REG_CLIP2
|
||||
jsr @REG_CLIPFUNC
|
||||
mov.l REG_CMD_EOL, @REG_TMP
|
||||
|
||||
TMP_SET_A
|
||||
TransformVertex REG_TMP
|
||||
TransformVertex REG_V1
|
||||
TMP_SET_B
|
||||
TransformVertex REG_TMP
|
||||
|
||||
lds r13,pr
|
||||
rts
|
||||
nop
|
||||
|
||||
_Case_0_1_0_0:
|
||||
! v2
|
||||
! / |
|
||||
! / |
|
||||
! ....A.....B...
|
||||
! / |
|
||||
! v1--v0---v3
|
||||
sts pr,r13
|
||||
|
||||
TMP_SET_A
|
||||
mov REG_V1, REG_CLIP1
|
||||
mov REG_V2, REG_CLIP2
|
||||
jsr @REG_CLIPFUNC
|
||||
mov.l REG_CMD_VTX, @REG_TMP
|
||||
|
||||
TMP_SET_B
|
||||
mov REG_V2, REG_CLIP1
|
||||
mov REG_V3, REG_CLIP2
|
||||
jsr @REG_CLIPFUNC
|
||||
mov.l REG_CMD_EOL, @REG_TMP
|
||||
|
||||
TMP_SET_A
|
||||
TransformVertex REG_TMP
|
||||
TransformVertex REG_V2
|
||||
TMP_SET_B
|
||||
TransformVertex REG_TMP
|
||||
|
||||
lds r13,pr
|
||||
rts
|
||||
nop
|
||||
|
||||
_Case_1_0_0_0:
|
||||
! v3
|
||||
! / |
|
||||
! / |
|
||||
! ....A.....B...
|
||||
! / |
|
||||
! v2--v1---v0
|
||||
sts pr,r13
|
||||
|
||||
TMP_SET_A
|
||||
mov REG_V2, REG_CLIP1
|
||||
mov REG_V3, REG_CLIP2
|
||||
jsr @REG_CLIPFUNC
|
||||
mov.l REG_CMD_VTX, @REG_TMP
|
||||
|
||||
TMP_SET_B
|
||||
mov REG_V3, REG_CLIP1
|
||||
mov REG_V0, REG_CLIP2
|
||||
jsr @REG_CLIPFUNC
|
||||
mov.l REG_CMD_VTX, @REG_TMP
|
||||
|
||||
TMP_SET_B
|
||||
TransformVertex REG_TMP
|
||||
TMP_SET_A
|
||||
TransformVertex REG_TMP
|
||||
TransformVertex REG_V3
|
||||
|
||||
lds r13,pr
|
||||
rts
|
||||
nop
|
||||
|
||||
|
||||
_Case_0_0_1_1:
|
||||
! v0-----------v1
|
||||
! \ |
|
||||
! ....B..........A...
|
||||
! \ |
|
||||
! v3-----v2
|
||||
sts pr,r13
|
||||
|
||||
TMP_SET_A
|
||||
mov REG_V1, REG_CLIP1
|
||||
mov REG_V2, REG_CLIP2
|
||||
jsr @REG_CLIPFUNC
|
||||
mov.l REG_CMD_VTX, @REG_TMP
|
||||
|
||||
TMP_SET_B
|
||||
mov REG_V3, REG_CLIP1
|
||||
mov REG_V0, REG_CLIP2
|
||||
jsr @REG_CLIPFUNC
|
||||
mov.l REG_CMD_EOL, @REG_TMP
|
||||
|
||||
TransformVertex REG_V1
|
||||
TMP_SET_A
|
||||
TransformVertex REG_TMP
|
||||
TransformVertex REG_V0
|
||||
TMP_SET_B
|
||||
TransformVertex REG_TMP
|
||||
|
||||
lds r13,pr
|
||||
rts
|
||||
nop
|
||||
|
||||
_Case_1_0_0_1:
|
||||
! v3-----------v0
|
||||
! \ |
|
||||
! ....B..........A...
|
||||
! \ |
|
||||
! v2-----v1
|
||||
sts pr,r13
|
||||
|
||||
TMP_SET_A
|
||||
mov REG_V0, REG_CLIP1
|
||||
mov REG_V1, REG_CLIP2
|
||||
jsr @REG_CLIPFUNC
|
||||
mov.l REG_CMD_VTX, @REG_TMP
|
||||
|
||||
TMP_SET_B
|
||||
mov REG_V2, REG_CLIP1
|
||||
mov REG_V3, REG_CLIP2
|
||||
jsr @REG_CLIPFUNC
|
||||
mov.l REG_CMD_VTX, @REG_TMP
|
||||
|
||||
TMP_SET_A
|
||||
TransformVertex REG_TMP
|
||||
TMP_SET_B
|
||||
TransformVertex REG_TMP
|
||||
TransformVertex REG_V0
|
||||
TransformVertex REG_V3
|
||||
|
||||
lds r13,pr
|
||||
rts
|
||||
nop
|
||||
|
||||
_Case_0_1_1_0:
|
||||
! v1-----------v2
|
||||
! \ |
|
||||
! ....B..........A...
|
||||
! \ |
|
||||
! v0-----v3
|
||||
sts pr,r13
|
||||
|
||||
TMP_SET_A
|
||||
mov REG_V2, REG_CLIP1
|
||||
mov REG_V3, REG_CLIP2
|
||||
jsr @REG_CLIPFUNC
|
||||
mov.l REG_CMD_EOL, @REG_TMP
|
||||
|
||||
TMP_SET_B
|
||||
mov REG_V0, REG_CLIP1
|
||||
mov REG_V1, REG_CLIP2
|
||||
jsr @REG_CLIPFUNC
|
||||
mov.l REG_CMD_VTX, @REG_TMP
|
||||
|
||||
TransformVertex REG_V1
|
||||
TransformVertex REG_V2
|
||||
TMP_SET_B
|
||||
TransformVertex REG_TMP
|
||||
TMP_SET_A
|
||||
TransformVertex REG_TMP
|
||||
|
||||
lds r13,pr
|
||||
rts
|
||||
nop
|
||||
|
||||
_Case_1_1_0_0:
|
||||
! v2-----------v3
|
||||
! \ |
|
||||
! ....B..........A...
|
||||
! \ |
|
||||
! v1-----v0
|
||||
sts pr,r13
|
||||
|
||||
TMP_SET_A
|
||||
mov REG_V3, REG_CLIP1
|
||||
mov REG_V0, REG_CLIP2
|
||||
jsr @REG_CLIPFUNC
|
||||
mov.l REG_CMD_VTX, @REG_TMP
|
||||
|
||||
TMP_SET_B
|
||||
mov REG_V1, REG_CLIP1
|
||||
mov REG_V2, REG_CLIP2
|
||||
jsr @REG_CLIPFUNC
|
||||
mov.l REG_CMD_VTX, @REG_TMP
|
||||
|
||||
TMP_SET_B
|
||||
TransformVertex REG_TMP
|
||||
TransformVertex REG_V2
|
||||
TMP_SET_A
|
||||
TransformVertex REG_TMP
|
||||
TransformVertex REG_V3
|
||||
|
||||
lds r13,pr
|
||||
rts
|
||||
nop
|
||||
|
||||
_Case_0_1_1_1:
|
||||
! --v1--
|
||||
! v0-- --v2
|
||||
! \ |
|
||||
! .....B.....A...
|
||||
! \ |
|
||||
! v3
|
||||
! v1,v2,v0 v2,v0,A v0,A,B
|
||||
sts pr,r13
|
||||
|
||||
TMP_SET_A
|
||||
mov REG_V2, REG_CLIP1
|
||||
mov REG_V3, REG_CLIP2
|
||||
jsr @REG_CLIPFUNC
|
||||
mov.l REG_CMD_VTX, @REG_TMP
|
||||
|
||||
TMP_SET_B
|
||||
mov REG_V3, REG_CLIP1
|
||||
mov REG_V0, REG_CLIP2
|
||||
jsr @REG_CLIPFUNC
|
||||
mov.l REG_CMD_EOL, @REG_TMP
|
||||
|
||||
TransformVertex REG_V1
|
||||
TransformVertex REG_V2
|
||||
TransformVertex REG_V0
|
||||
TMP_SET_A
|
||||
TransformVertex REG_TMP
|
||||
TMP_SET_B
|
||||
TransformVertex REG_TMP
|
||||
|
||||
lds r13,pr
|
||||
rts
|
||||
nop
|
||||
|
||||
_Case_1_0_1_1:
|
||||
! --v0--
|
||||
! v3-- --v1
|
||||
! \ |
|
||||
! .....B.....A...
|
||||
! \ |
|
||||
! v2
|
||||
! v0,v1,v3 v1,v3,A v3,A,B
|
||||
sts pr,r13
|
||||
|
||||
TMP_SET_A
|
||||
mov REG_V1, REG_CLIP1
|
||||
mov REG_V2, REG_CLIP2
|
||||
jsr @REG_CLIPFUNC
|
||||
mov.l REG_CMD_VTX, @REG_TMP
|
||||
|
||||
TMP_SET_B
|
||||
mov REG_V2, REG_CLIP1
|
||||
mov REG_V3, REG_CLIP2
|
||||
jsr @REG_CLIPFUNC
|
||||
mov.l REG_CMD_EOL, @REG_TMP
|
||||
mov.l REG_CMD_VTX, @REG_V3
|
||||
|
||||
TransformVertex REG_V0
|
||||
TransformVertex REG_V1
|
||||
TransformVertex REG_V3
|
||||
TMP_SET_A
|
||||
TransformVertex REG_TMP
|
||||
TMP_SET_B
|
||||
TransformVertex REG_TMP
|
||||
|
||||
lds r13,pr
|
||||
rts
|
||||
nop
|
||||
|
||||
_Case_1_1_0_1:
|
||||
! --v3--
|
||||
! v2-- --v0
|
||||
! \ |
|
||||
! .....B.....A...
|
||||
! \ |
|
||||
! v1
|
||||
! v3,v0,v2 v0,v2,A v2,A,B
|
||||
sts pr,r13
|
||||
|
||||
TMP_SET_A
|
||||
mov REG_V0, REG_CLIP1
|
||||
mov REG_V1, REG_CLIP2
|
||||
jsr @REG_CLIPFUNC
|
||||
mov.l REG_CMD_VTX, @REG_TMP
|
||||
|
||||
TMP_SET_B
|
||||
mov REG_V1, REG_CLIP1
|
||||
mov REG_V2, REG_CLIP2
|
||||
jsr @REG_CLIPFUNC
|
||||
mov.l REG_CMD_EOL, @REG_TMP
|
||||
mov.l REG_CMD_VTX, @REG_V3
|
||||
|
||||
TransformVertex REG_V3
|
||||
TransformVertex REG_V0
|
||||
TransformVertex REG_V2
|
||||
TMP_SET_A
|
||||
TransformVertex REG_TMP
|
||||
TMP_SET_B
|
||||
TransformVertex REG_TMP
|
||||
|
||||
lds r13,pr
|
||||
rts
|
||||
nop
|
||||
|
||||
_Case_1_1_1_0:
|
||||
! --v2--
|
||||
! v1-- --v3
|
||||
! \ |
|
||||
! .....B.....A...
|
||||
! \ |
|
||||
! v0
|
||||
! v2,v3,v1 v3,v1,A v1,A,B
|
||||
sts pr,r13
|
||||
|
||||
TMP_SET_A
|
||||
mov REG_V3, REG_CLIP1
|
||||
mov REG_V0, REG_CLIP2
|
||||
jsr @REG_CLIPFUNC
|
||||
mov.l REG_CMD_VTX, @REG_TMP
|
||||
|
||||
TMP_SET_B
|
||||
mov REG_V0, REG_CLIP1
|
||||
mov REG_V1, REG_CLIP2
|
||||
jsr @REG_CLIPFUNC
|
||||
mov.l REG_CMD_EOL, @REG_TMP
|
||||
mov.l REG_CMD_VTX, @REG_V3
|
||||
|
||||
TransformVertex REG_V2
|
||||
TransformVertex REG_V3
|
||||
TransformVertex REG_V1
|
||||
TMP_SET_A
|
||||
TransformVertex REG_TMP
|
||||
TMP_SET_B
|
||||
TransformVertex REG_TMP
|
||||
|
||||
lds r13,pr
|
||||
rts
|
||||
nop
|
||||
|
||||
_Case_1_1_1_1:
|
||||
! Triangle strip: {1,2,0} {2,0,3}
|
||||
TransformVertex REG_V1
|
||||
TransformVertex REG_V2
|
||||
TransformVertex REG_V0
|
||||
TransformVertex REG_V3
|
||||
rts
|
||||
nop
|
||||
|
||||
.global _ProcessVertexList
|
||||
.align 4
|
||||
_ProcessVertexList:
|
||||
! STORE CPU REGISTERS
|
||||
mov.l r8,@-r15
|
||||
mov.l r9,@-r15
|
||||
mov.l r10,@-r15
|
||||
mov.l r11,@-r15
|
||||
mov.l r12,@-r15
|
||||
mov.l r13,@-r15
|
||||
mov.l r14,@-r15
|
||||
sts.l pr,@-r15
|
||||
! REGISTER SETUP
|
||||
mov r4,r14
|
||||
mov r4,r13
|
||||
mov.l .CLIPFUNC,r12
|
||||
mov.l .PVR_EOL, r11
|
||||
mov.l .PVR_VTX, r10
|
||||
mov r5,r9
|
||||
mov r6,r8
|
||||
bra SUBMIT_LOOP
|
||||
add #-64,r15
|
||||
|
||||
! Submits a PVR2 TA GPU command
|
||||
DO_CMD:
|
||||
PushVertex REG_V0
|
||||
bra NEXT_ITER
|
||||
nop
|
||||
|
||||
SUBMIT_LOOP:
|
||||
mov.l @r13,r0 ! FLAGS = CUR->flags
|
||||
add #32,r14 ! NEXT += sizeof(Vertex)
|
||||
mov r0,r2 ! TYPE = FLAGS
|
||||
and r11,r2 ! TYPE = FLAGS & 0xF000000 (reuse PVR_CMD_VERTEX_EOL as type mask)
|
||||
! Check for PVR_CMD_VERTEX
|
||||
cmp/eq r10,r2 ! T = r2 == PVR_CMD_VERTEX
|
||||
bt.s NEXT_ITER ! if (T) goto NEXT_ITER
|
||||
pref @r14 ! prefetch(NEXT) -- always executed
|
||||
! Check for non PVR_CMD_VERTEX_EOL
|
||||
cmp/eq r11,r2 ! T = r2 == PVR_CMD_VERTEX_EOL
|
||||
bf.s DO_CMD ! if (!T) goto DO_CMD
|
||||
! PVR_CMD_VERTEX_EOL case
|
||||
extu.b r0,r1 ! EX, MASK = FLAGS & 0xFF (branch delay slot)
|
||||
|
||||
! Prepare and then jump to quad drawing function, based on quad clipflags
|
||||
mova .CASES,r0 ! LS, r0 = CASES
|
||||
mov r13,r7 ! MT, r7 = v3
|
||||
shll2 r1 ! EX, MASK <<= 2
|
||||
mov r13,r6 ! MT, r6 = v3
|
||||
mov.l @(r0,r1),r2 ! LS, r1 = CASES[MASK]
|
||||
mov r13,r5 ! MT, r5 = v3
|
||||
add #-32,r6 ! EX, r6 = v3 - 1 (v2)
|
||||
mov r13,r4 ! MT, r4 = v3
|
||||
add #-64,r5 ! EX, r5 = v3 - 2 (v1)
|
||||
jsr @r2 ! C0, jump CASES[MASK]
|
||||
add #-96,r4 ! EX, r4 = v3 - 3 (v0) (branch delay slot)
|
||||
NEXT_ITER:
|
||||
dt r9 ! NUM--; T = NUM == 0
|
||||
bf.s SUBMIT_LOOP
|
||||
mov r14,r13 ! CUR = NEXT
|
||||
|
||||
add #64,r15
|
||||
! RESTORE CPU REGISTERS
|
||||
lds.l @r15+,pr
|
||||
mov.l @r15+,r14
|
||||
mov.l @r15+,r13
|
||||
mov.l @r15+,r12
|
||||
mov.l @r15+,r11
|
||||
mov.l @r15+,r10
|
||||
mov.l @r15+,r9
|
||||
rts
|
||||
mov.l @r15+,r8
|
||||
.size _ProcessVertexList, .-_ProcessVertexList
|
||||
.type _ProcessVertexList, %function
|
||||
|
||||
.align 4
|
||||
.VP_1:
|
||||
.long _vp
|
||||
.PVR_VTX:
|
||||
.long 0xE0000000
|
||||
.PVR_EOL:
|
||||
.long 0xF0000000
|
||||
.CLIPFUNC:
|
||||
.long _ClipLine
|
||||
|
||||
BUGGY_CASE:
|
||||
rts
|
||||
nop
|
||||
|
||||
! CASES table holds the functions to transfer a quad,
|
||||
! based on the visibility clipflags of the 4 vertices
|
||||
! e.g. CASES[15] = V0_VIS | V1_VIS | V2_VIS | V3_VIS (all 4 visible)
|
||||
.CASES:
|
||||
.long BUGGY_CASE ! Should never happen
|
||||
.long _Case_0_0_0_1
|
||||
.long _Case_0_0_1_0
|
||||
.long _Case_0_0_1_1
|
||||
.long _Case_0_1_0_0
|
||||
.long BUGGY_CASE ! V0_VIS | V2_VIS, Should never happen
|
||||
.long _Case_0_1_1_0
|
||||
.long _Case_0_1_1_1
|
||||
.long _Case_1_0_0_0
|
||||
.long _Case_1_0_0_1
|
||||
.long BUGGY_CASE ! V1_VIS | V3_VIS, Should never happen
|
||||
.long _Case_1_0_1_1
|
||||
.long _Case_1_1_0_0
|
||||
.long _Case_1_1_0_1
|
||||
.long _Case_1_1_1_0
|
||||
.long _Case_1_1_1_1
|
||||
230
misc/dreamcast/VertexTransform.S
Normal file
230
misc/dreamcast/VertexTransform.S
Normal file
@@ -0,0 +1,230 @@
|
||||
#define FLG r0 // clip flags
|
||||
#define TMP r1 // temp
|
||||
#define VTX r2 // PVR_CMD_VERTEX
|
||||
#define EOS r3 // PVR_CMD_VERTEX_EOL
|
||||
#define SRC r4 // src pointer ARG
|
||||
#define DST r5 // dst pointer ARG
|
||||
#define CNT r6 // quads count ARG
|
||||
#define PFT r7 // prefetch address
|
||||
|
||||
#define ZERO fr0 // 0.0
|
||||
#define F_U fr1 // vertex.u
|
||||
#define F_V fr2 // vertex.v
|
||||
#define F_C fr3 // vertex.colour
|
||||
#define F_X fr4 // vertex.x
|
||||
#define F_Y fr5 // vertex.y
|
||||
#define F_Z fr6 // vertex.z
|
||||
#define F_W fr7 // vertex.w
|
||||
|
||||
#define XYZW fv4 // vertex.xyzw
|
||||
|
||||
|
||||
! =========================================================
|
||||
! ========================= TRANSFORM SETUP ===============
|
||||
! =========================================================
|
||||
.macro TransformSetup
|
||||
mov SRC, PFT ! MT, pft = src
|
||||
add #-32, DST ! EX, dst -= sizeof(VERTEX)
|
||||
mov #0xE0, VTX ! EX, VTX = 0x00 00 00 E0
|
||||
pref @PFT ! LS, PREFETCH pft (first vertex)
|
||||
shll16 VTX ! EX, VTX = 0x00 E0 00 00
|
||||
shll8 VTX ! EX, VTX = 0xE0 00 00 00 (PVR_CMD_VERTEX)
|
||||
mov #0xF0, EOS ! EX, EOS = 0x00 00 00 F0
|
||||
shll16 EOS ! EX, EOS = 0x00 F0 00 00
|
||||
shll8 EOS ! EX, EOS = 0xF0 00 00 00 (PVR_CMD_VERTEX_EOL)
|
||||
fldi0 ZERO ! LS, fr0 = 0.0
|
||||
.endm
|
||||
|
||||
.macro TransformEnd
|
||||
add #32, DST ! EX, DST += sizeof(VERTEX)
|
||||
rts ! CO, return after executing instruction in delay slot
|
||||
mov DST, r0 ! MT, r0 = DST
|
||||
.endm
|
||||
|
||||
|
||||
! =========================================================
|
||||
! ========================= VERTEX LOADING ================
|
||||
! =========================================================
|
||||
.macro LoadColouredVertex
|
||||
! LOAD XYZ
|
||||
fmov @SRC+, F_X ! LS, X = src->x
|
||||
fmov @SRC+, F_Y ! LS, Y = src->y
|
||||
fmov @SRC+, F_Z ! LS, Z = src->z
|
||||
fldi1 F_W ! LS, W = 1.0
|
||||
! PREPARE NEXT VERTEX
|
||||
add #16, PFT ! EX, pft += VERTEX_STRIDE
|
||||
pref @PFT ! LS, PREFETCH pft (next vertex)
|
||||
add #64, DST ! EX, dst += 2 * sizeof(VERTEX)
|
||||
! TRANSFORM VERTEX
|
||||
ftrv xmtrx, XYZW ! FE, TRANSFORM(XYZW)
|
||||
! LOAD ATTRIBUTES
|
||||
fmov @SRC+, F_C ! LS, C = src->color
|
||||
.endm
|
||||
|
||||
.macro LoadTexturedVertex
|
||||
! LOAD XYZ
|
||||
fmov @SRC+, F_X ! LS, X = src->x
|
||||
fmov @SRC+, F_Y ! LS, Y = src->y
|
||||
fmov @SRC+, F_Z ! LS, Z = src->z
|
||||
fldi1 F_W ! LS, W = 1.0
|
||||
! PREPARE NEXT VERTEX
|
||||
add #24, PFT ! EX, pft += VERTEX_STRIDE
|
||||
pref @PFT ! LS, PREFETCH pft (next vertex)
|
||||
add #64, DST ! EX, dst += 2 * sizeof(VERTEX)
|
||||
! TRANSFORM VERTEX
|
||||
ftrv xmtrx, XYZW ! FE, TRANSFORM(XYZW)
|
||||
! LOAD ATTRIBUTES
|
||||
fmov @SRC+, F_C ! LS, C = src->color
|
||||
fmov @SRC+, F_U ! LS, U = src->u
|
||||
fmov @SRC+, F_V ! LS, V = src->v
|
||||
.endm
|
||||
|
||||
! =========================================================
|
||||
! ========================= VERTEX OUTPUT =================
|
||||
! =========================================================
|
||||
! To take advantage of SH4 dual instruction processing,
|
||||
! clipflag calculation and vertex output are interleaved
|
||||
.macro ProcessVertex1
|
||||
fmov.s F_Z,@-DST ! LS, dst->z = Z
|
||||
fmov.s F_C,@-DST ! LS, dst->c = C
|
||||
fmov.s F_V,@-DST ! LS, dst->v = V
|
||||
fcmp/gt ZERO, F_Z ! FE, T = Z > 0
|
||||
fmov.s F_U,@-DST ! LS, dst->u = U
|
||||
movt FLG ! EX, CLIPFLAGS = T
|
||||
fmov.s F_W,@-DST ! LS, dst->w = W
|
||||
fmov.s F_Y,@-DST ! LS, dst->y = Y
|
||||
fmov.s F_X,@-DST ! LS, dst->x = X
|
||||
mov.l VTX,@-DST ! LS, dst->flags = PVR_CMD_VERTEX
|
||||
.endm
|
||||
|
||||
.macro ProcessVertex2
|
||||
fmov.s F_Z,@-DST ! LS, dst->z = Z
|
||||
fmov.s F_C,@-DST ! LS, dst->c = C
|
||||
fmov.s F_V,@-DST ! LS, dst->v = V
|
||||
fcmp/gt ZERO,F_Z ! FE, T = Z > 0
|
||||
fmov.s F_U,@-DST ! LS, dst->u = U
|
||||
movt TMP ! EX, tmp = T
|
||||
fmov.s F_W,@-DST ! LS, dst->w = W
|
||||
add TMP,TMP ! EX, tmp = tmp + tmp
|
||||
fmov.s F_Y,@-DST ! LS, dst->y = Y
|
||||
or TMP,FLG ! EX, CLIPFLAGS |= tmp (T << 1)
|
||||
fmov.s F_X,@-DST ! LS, dst->x = X
|
||||
mov.l VTX,@-DST ! LS, dst->flags = PVR_CMD_VERTEX
|
||||
.endm
|
||||
|
||||
.macro ProcessVertex3
|
||||
fmov.s F_Z,@-DST ! LS, dst->z = Z
|
||||
fmov.s F_C,@-DST ! LS, dst->c = C
|
||||
fmov.s F_V,@-DST ! LS, dst->v = V
|
||||
fcmp/gt ZERO, F_Z ! FE, T = Z > 0
|
||||
fmov.s F_U,@-DST ! LS, dst->u = U
|
||||
movt TMP ! EX, tmp = T
|
||||
fmov.s F_W,@-DST ! LS, dst->w = W
|
||||
fmov.s F_Y,@-DST ! LS, dst->y = Y
|
||||
shll2 TMP ! EX, tmp = tmp << 2
|
||||
fmov.s F_X,@-DST ! LS, dst->x = X
|
||||
or TMP,FLG ! EX, CLIPFLAGS |= tmp (T << 2)
|
||||
mov.l VTX,@-DST ! LS, dst->flags = PVR_CMD_VERTEX
|
||||
.endm
|
||||
|
||||
.macro ProcessVertex4
|
||||
fmov.s F_Z,@-DST ! LS, dst->z = Z
|
||||
or EOS,FLG ! EX, CLIPFLAGS |= PVR_CMD_VERTEX_EOL
|
||||
fmov.s F_C,@-DST ! LS, dst->c = C
|
||||
fmov.s F_V,@-DST ! LS, dst->v = V
|
||||
fcmp/gt ZERO, F_Z ! FE, T = Z > 0
|
||||
fmov.s F_U,@-DST ! LS, dst->u = U
|
||||
movt TMP ! EX, tmp = T
|
||||
fmov.s F_W,@-DST ! LS, dst->w = W
|
||||
shll2 TMP ! EX, tmp = tmp << 2
|
||||
fmov.s F_Y,@-DST ! LS, dst->y = Y
|
||||
add TMP,TMP ! EX, tmp = (tmp << 2) + (tmp << 2) (T << 3)
|
||||
fmov.s F_X,@-DST ! LS, dst->x = X
|
||||
or TMP,FLG ! EX, CLIPFLAGS |= tmp (T << 3)
|
||||
mov.l FLG,@-DST ! LS, dst->flags = PVR_CMD_VERTEX_EOL | CLIPFLAGS
|
||||
.endm
|
||||
|
||||
|
||||
! =========================================================
|
||||
! ==================== TEXTURED VERTEX TRANSFORM ==========
|
||||
! =========================================================
|
||||
.global _DrawTexturedQuads
|
||||
.align 4
|
||||
|
||||
_DrawTexturedQuads:
|
||||
! Setup
|
||||
TransformSetup
|
||||
|
||||
.T_TRANSFORM_QUAD:
|
||||
LoadTexturedVertex
|
||||
ProcessVertex1
|
||||
|
||||
LoadTexturedVertex
|
||||
ProcessVertex2
|
||||
|
||||
LoadTexturedVertex
|
||||
ProcessVertex3
|
||||
|
||||
LoadTexturedVertex
|
||||
ProcessVertex4
|
||||
|
||||
! CLIPFLAGS TESTING
|
||||
and #15,FLG
|
||||
cmp/eq #0,FLG ! T = CLIPFLAGS == 0 (all points invisible)
|
||||
bf/s .T_LOOP_END ! if !T goto LOOP_END
|
||||
nop
|
||||
|
||||
! No points visible case
|
||||
add #-128, DST ! DST -= 4 * sizeof(VERTEX), move back to prior quad, so that this invisible quad gets overwritten in next iteration
|
||||
|
||||
.T_LOOP_END:
|
||||
dt CNT ! count--; T = count == 0
|
||||
bf .T_TRANSFORM_QUAD ! if !T then goto T_TRANSFORM_QUAD
|
||||
nop
|
||||
|
||||
TransformEnd
|
||||
.size _DrawTexturedQuads, .-_DrawTexturedQuads
|
||||
.type _DrawTexturedQuads, %function
|
||||
|
||||
! =========================================================
|
||||
! ==================== COLOURED VERTEX TRANSFORM ==========
|
||||
! =========================================================
|
||||
.global _DrawColouredQuads
|
||||
.align 4
|
||||
|
||||
_DrawColouredQuads:
|
||||
! Setup
|
||||
fldi0 F_U ! U = 0
|
||||
fldi0 F_V ! V = 0
|
||||
TransformSetup
|
||||
|
||||
.C_TRANSFORM_QUAD:
|
||||
LoadColouredVertex
|
||||
ProcessVertex1
|
||||
|
||||
LoadColouredVertex
|
||||
ProcessVertex2
|
||||
|
||||
LoadColouredVertex
|
||||
ProcessVertex3
|
||||
|
||||
LoadColouredVertex
|
||||
ProcessVertex4
|
||||
|
||||
! CLIPFLAGS TESTING
|
||||
and #15,FLG
|
||||
cmp/eq #0,FLG ! T = CLIPFLAGS == 0 (all points invisible)
|
||||
bf/s .C_LOOP_END ! if !T goto LOOP_END
|
||||
nop
|
||||
|
||||
! No points visible case
|
||||
add #-128, DST ! dst -= 4 * sizeof(VERTEX), move back to 1 vertex before start of quad
|
||||
|
||||
.C_LOOP_END:
|
||||
dt CNT ! count--; T = count == 0
|
||||
bf .C_TRANSFORM_QUAD ! if !T then goto TRANSFORM_QUAD
|
||||
nop
|
||||
|
||||
TransformEnd
|
||||
.size _DrawColouredQuads, .-_DrawColouredQuads
|
||||
.type _DrawColouredQuads, %function
|
||||
BIN
misc/dreamcast/boot_logo.png
Normal file
BIN
misc/dreamcast/boot_logo.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 3.9 KiB |
3
misc/dreamcast/ip.txt
Normal file
3
misc/dreamcast/ip.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
Version : V1.360
|
||||
SW Maker Name : ClassiCube team
|
||||
Game Title : ClassiCube
|
||||
9
misc/dreamcast/readme.txt
Normal file
9
misc/dreamcast/readme.txt
Normal file
@@ -0,0 +1,9 @@
|
||||
The dreamcast build requires an initial bootstrap program (Initial Program) named IP.bin
|
||||
|
||||
To generate a custom IP.bin, compile https://github.com/Dreamcast-Projects/makeip
|
||||
|
||||
Then run: makeip ip.txt IP.BIN -l boot_logo.png
|
||||
|
||||
---
|
||||
|
||||
For more details about IP.bin, see https://mc.pp.se/dc/ip.bin.html
|
||||
31
misc/dreamcast/sh4_notes.txt
Normal file
31
misc/dreamcast/sh4_notes.txt
Normal file
@@ -0,0 +1,31 @@
|
||||
=========================================================
|
||||
======================== PROCESSOR INFO =================
|
||||
=========================================================
|
||||
The SH4 can dual issue (i.e. parallel execution) two instructions
|
||||
as long as the groups of the two instructions are different:
|
||||
* LS - most ALU and FPU register load/stores
|
||||
* EX - most ALU arithmetic instructions
|
||||
* MT - TST, CMP, NOP, MOV Rm,Rn (NOTE: Can execute in parallel with other MT)
|
||||
* FE - most FPU arithmetic instructions
|
||||
* CO - other instructions (NOTE: Cannot never execute in parallel)
|
||||
|
||||
The following general aspects of instructions are important to note per the SH4 manual:
|
||||
* Issue rate: Interval between the issue of an instruction and that of the next instruction
|
||||
* Latency: Interval between the issue of an instruction and the generation of its result (completion)
|
||||
* Latency is also the interval between the execution of two instructions with an interdependent relationship.
|
||||
(although different cases may either increase or decrease Latency)
|
||||
|
||||
|
||||
=========================================================
|
||||
======================== REGISTER USAGES ================
|
||||
=========================================================
|
||||
SH4 C ABI:
|
||||
- R0 to R3 are return values (can be overwritten)
|
||||
- R4 to R7 are input arguments (can be overwritten)
|
||||
- R8 to R13 are non-volatile (must be restored at end)
|
||||
- R14 is the frame pointer (must be restored at end)
|
||||
- R15 is the stack pointer (must be restored at end)
|
||||
- FR0 to FR3 are return values (can be overwritten)
|
||||
- FR4 to FR11 are input arguments (can be overwritten)
|
||||
- FR12 to FR13 are non-volatile (must be restored at end)
|
||||
|
||||
Reference in New Issue
Block a user