diff --git a/ee/draw/src/draw.c b/ee/draw/src/draw.c index 07b2d54503da..fba86612abc8 100644 --- a/ee/draw/src/draw.c +++ b/ee/draw/src/draw.c @@ -351,7 +351,11 @@ unsigned char draw_log2(unsigned int x) unsigned char res; +#ifdef _EE __asm__ __volatile__ ("plzcw %0, %1\n\t" : "=r" (res) : "r" (x)); +#else + res = __builtin_clrsb(x); +#endif res = 31 - (res + 1); res += (x > (unsigned int)(1< res) res++; diff --git a/ee/libvux/src/vuhw.c b/ee/libvux/src/vuhw.c index 689243dd1955..c7bb25065c55 100644 --- a/ee/libvux/src/vuhw.c +++ b/ee/libvux/src/vuhw.c @@ -9,6 +9,9 @@ */ #include +#ifndef _EE +#include +#endif void Vu0IdMatrix(VU_MATRIX *m) { @@ -17,6 +20,7 @@ void Vu0IdMatrix(VU_MATRIX *m) void Vu0ResetMatrix(VU_MATRIX *m) { +#ifdef _EE __asm__ __volatile__( #if __GNUC__ > 3 "vmr32.xyzw $vf18, $vf0 \n" @@ -38,6 +42,13 @@ void Vu0ResetMatrix(VU_MATRIX *m) : : "r"(m) ); +#else + memset(m, 0, sizeof(*m)); + m->m[3][3] = 1.0f; + m->m[2][2] = 1.0f; + m->m[1][1] = 1.0f; + m->m[0][0] = 1.0f; +#endif } #if 0 @@ -61,6 +72,7 @@ void VuxRotMatrix(VU_MATRIX *m, VU_VECTOR *r) void Vu0TransMatrix(VU_MATRIX *m, VU_VECTOR *t) { +#ifdef _EE __asm__ __volatile__ ( #if __GNUC__ > 3 "lqc2 $vf1, 0(%1) \n" // load 1 qword from 't' to vu's vf1 @@ -72,6 +84,9 @@ void Vu0TransMatrix(VU_MATRIX *m, VU_VECTOR *t) : : "r" (m), "r" (t) ); +#else + *(VU_VECTOR *)(&m->m[3][0]) = *t; +#endif } void Vu0TransMatrixXYZ(VU_MATRIX *m,float x, float y, float z) @@ -93,6 +108,7 @@ void Vu0TransMatrixXYZ(VU_MATRIX *m,float x, float y, float z) void Vu0ScaleMatrix(VU_MATRIX *m, VU_VECTOR *s) { +#ifdef _EE __asm__ __volatile__ ( #if __GNUC__ > 3 "lqc2 $vf1, 0(%1) \n" // load 1 qword from 't' to vu's vf1 @@ -130,6 +146,11 @@ void Vu0ScaleMatrix(VU_MATRIX *m, VU_VECTOR *s) : : "r" (m), "r" (s) ); +#else + m->m[0][0] *= s->x; + m->m[1][1] *= s->y; + m->m[2][2] *= s->z; +#endif } @@ -148,6 +169,7 @@ void Vu0ScaleMatrixXYZ(VU_MATRIX *m, float x, float y, float z) void Vu0MulMatrix(VU_MATRIX *m0, VU_MATRIX *m1, VU_MATRIX *out) { +#ifdef _EE __asm__ __volatile__ ( #if __GNUC__ > 3 "lqc2 $vf1, 0x00(%0) \n" @@ -210,6 +232,22 @@ void Vu0MulMatrix(VU_MATRIX *m0, VU_MATRIX *m1, VU_MATRIX *out) #endif : : "r" (m0), "r" (m1), "r" (out) ); +#else + memset(out, 0, sizeof(*out)); + int i; + for (i = 0; i < 4; i += 1) + { + int j; + for (j = 0; j < 4; j += 1) + { + int k; + for (k = 0; k < 4; k += 1) + { + out->m[i][k] += m1->m[j][k] * m0->m[i][j]; + } + } + } +#endif } void Vu0InverseMatrix(VU_MATRIX *in, VU_MATRIX *out) @@ -220,13 +258,7 @@ void Vu0InverseMatrix(VU_MATRIX *in, VU_MATRIX *out) void Vu0ApplyMatrix(VU_MATRIX *m, VU_VECTOR *v0, VU_VECTOR *out) { - /* - out->x = m->m[0][0]*v0->x + m->m[1][0]*v0->y + m->m[2][0]*v0->z + m->m[3][0]*v0->w; - out->y = m->m[0][1]*v0->x + m->m[1][1]*v0->y + m->m[2][1]*v0->z + m->m[3][1]*v0->w; - out->z = m->m[0][2]*v0->x + m->m[1][2]*v0->y + m->m[2][2]*v0->z + m->m[3][2]*v0->w; - out->w = m->m[0][3]*v0->x + m->m[1][3]*v0->y + m->m[2][3]*v0->z + m->m[3][3]*v0->w; - */ - +#ifdef _EE __asm__ __volatile__( #if __GNUC__ > 3 "lqc2 $vf20, 0x00(%1) \n" @@ -253,17 +285,18 @@ void Vu0ApplyMatrix(VU_MATRIX *m, VU_VECTOR *v0, VU_VECTOR *out) #endif : : "r"(m), "r"(v0), "r"(out) ); +#else + out->x = m->m[0][0] * v0->x + m->m[1][0] * v0->y + m->m[2][0] * v0->z + m->m[3][0] * v0->w; + out->y = m->m[0][1] * v0->x + m->m[1][1] * v0->y + m->m[2][1] * v0->z + m->m[3][1] * v0->w; + out->z = m->m[0][2] * v0->x + m->m[1][2] * v0->y + m->m[2][2] * v0->z + m->m[3][2] * v0->w; + out->w = m->m[0][3] * v0->x + m->m[1][3] * v0->y + m->m[2][3] * v0->z + m->m[3][3] * v0->w; +#endif } void Vu0ApplyRotMatrix(VU_MATRIX *m, VU_VECTOR *v0, VU_VECTOR *out) { - /* - out->x = m->m[0][0]*v0->x + m->m[1][0]*v0->y + m->m[2][0]*v0->z; - out->y = m->m[0][1]*v0->x + m->m[1][1]*v0->y + m->m[2][1]*v0->z; - out->z = m->m[0][2]*v0->x + m->m[1][2]*v0->y + m->m[2][2]*v0->z; - */ - +#ifdef _EE __asm__ __volatile__( #if __GNUC__ > 3 "lqc2 $vf20, 0x00(%1) \n" @@ -288,12 +321,19 @@ void Vu0ApplyRotMatrix(VU_MATRIX *m, VU_VECTOR *v0, VU_VECTOR *out) #endif : : "r"(m), "r"(v0), "r"(out) ); +#else + out->x = m->m[0][0] * v0->x + m->m[1][0] * v0->y + m->m[2][0] * v0->z; + out->y = m->m[0][1] * v0->x + m->m[1][1] * v0->y + m->m[2][1] * v0->z; + out->z = m->m[0][2] * v0->x + m->m[1][2] * v0->y + m->m[2][2] * v0->z; + out->w = 1.0f; +#endif } void Vu0CopyMatrix(VU_MATRIX *dest, VU_MATRIX *src) { +#ifdef _EE __asm__ __volatile__ ( #if __GNUC__ > 3 "lqc2 $vf1, 0(%1) \n" // load 1 qword from ee @@ -319,14 +359,16 @@ void Vu0CopyMatrix(VU_MATRIX *dest, VU_MATRIX *src) : : "r" (dest), "r" (src) ); +#else + *dest = *src; +#endif } float Vu0DotProduct(VU_VECTOR *v0, VU_VECTOR *v1) { float ret=0; - /* ret = (v0.x*v1.x + v0.y*v1.y + v0.z*v1.z);*/ - +#ifdef _EE __asm__ __volatile__ ( #if __GNUC__ > 3 "lqc2 $vf1, 0(%1) \n" // load 1 qword from ee @@ -351,6 +393,9 @@ float Vu0DotProduct(VU_VECTOR *v0, VU_VECTOR *v1) #endif : "=r" (ret) : "r" (v0), "r" (v1) ); +#else + ret = (v0->x * v1->x + v0->y * v1->y + v0->z * v1->z); +#endif return ret; } diff --git a/ee/math3d/src/math3d.c b/ee/math3d/src/math3d.c index 1e8b66e765ba..e31e1b60b414 100644 --- a/ee/math3d/src/math3d.c +++ b/ee/math3d/src/math3d.c @@ -18,6 +18,7 @@ /* VECTOR FUNCTIONS */ void vector_apply(VECTOR output, VECTOR input0, MATRIX input1) { +#ifdef _EE __asm__ __volatile__ ( #if __GNUC__ > 3 "lqc2 $vf1, 0x00(%2) \n" @@ -45,6 +46,18 @@ : : "r" (output), "r" (input0), "r" (input1) : "memory" ); +#else + int i; + memset(output, 0, sizeof(VECTOR)); + for (i = 0; i < 4; i += 1) + { + int j; + for (j = 0; j < 4; j += 1) + { + output[j] += input1[(4 * i) + j] * (i != 3 ? input0[i] : 1.0f); + } + } +#endif } void vector_clamp(VECTOR output, VECTOR input0, float min, float max) { @@ -71,6 +84,7 @@ } void vector_copy(VECTOR output, VECTOR input0) { +#ifdef _EE __asm__ __volatile__ ( #if __GNUC__ > 3 "lqc2 $vf1, 0x00(%1) \n" @@ -82,6 +96,9 @@ : : "r" (output), "r" (input0) : "memory" ); +#else + memcpy(output, input0, sizeof(VECTOR)); +#endif } float vector_innerproduct(VECTOR input0, VECTOR input1) { @@ -119,6 +136,7 @@ } void vector_normalize(VECTOR output, VECTOR input0) { +#ifdef _EE __asm__ __volatile__ ( #if __GNUC__ > 3 "lqc2 $vf1, 0x00(%1) \n" @@ -146,9 +164,18 @@ : : "r" (output), "r" (input0) : "memory" ); +#else + float q; + q = 1.0f / sqrtf((input0[0] * input0[0]) + (input0[1] * input0[1]) + (input0[2] * input0[2])); + output[0] = input0[0] * q; + output[1] = input0[1] * q; + output[2] = input0[2] * q; + output[3] = 0.0f; +#endif } void vector_outerproduct(VECTOR output, VECTOR input0, VECTOR input1) { +#ifdef _EE __asm__ __volatile__ ( #if __GNUC__ > 3 "lqc2 $vf1, 0x00(%1) \n" @@ -168,6 +195,12 @@ : : "r" (output), "r" (input0), "r" (input1) : "memory" ); +#else + output[0] = input0[1] * input1[2] - input1[1] * input0[2]; + output[1] = input0[2] * input1[0] - input1[2] * input0[0]; + output[2] = input0[0] * input1[1] - input1[0] * input0[1]; + output[3] = 0.0f; +#endif } void vector_add(VECTOR sum, VECTOR addend, VECTOR summand) { @@ -205,6 +238,7 @@ void vector_triangle_normal(VECTOR output, VECTOR a, VECTOR b, VECTOR c) { /* MATRIX FUNCTIONS */ void matrix_copy(MATRIX output, MATRIX input0) { +#ifdef _EE __asm__ __volatile__ ( #if __GNUC__ > 3 "lqc2 $vf1, 0x00(%1) \n" @@ -228,6 +262,9 @@ void vector_triangle_normal(VECTOR output, VECTOR a, VECTOR b, VECTOR c) { : : "r" (output), "r" (input0) : "memory" ); +#else + memcpy(output, input0, sizeof(MATRIX)); +#endif } void matrix_inverse(MATRIX output, MATRIX input0) { @@ -249,6 +286,7 @@ void vector_triangle_normal(VECTOR output, VECTOR a, VECTOR b, VECTOR c) { } void matrix_multiply(MATRIX output, MATRIX input0, MATRIX input1) { +#ifdef _EE __asm__ __volatile__ ( #if __GNUC__ > 3 "lqc2 $vf1, 0x00(%1) \n" @@ -312,6 +350,22 @@ void vector_triangle_normal(VECTOR output, VECTOR a, VECTOR b, VECTOR c) { : : "r" (output), "r" (input0), "r" (input1) : "memory" ); +#else + int i; + memset(output, 0, sizeof(MATRIX)); + for (i = 0; i < 4; i += 1) + { + int j; + for (j = 0; j < 4; j += 1) + { + int k; + for (k = 0; k < 4; k += 1) + { + output[(4 * i) + k] = input1[(4 * j) + k] * input0[(4 * i) + j]; + } + } + } +#endif } void matrix_rotate(MATRIX output, MATRIX input0, VECTOR input1) { @@ -476,6 +530,7 @@ void vector_triangle_normal(VECTOR output, VECTOR a, VECTOR b, VECTOR c) { /* CALCULATE FUNCTIONS */ void calculate_normals(VECTOR *output, int count, VECTOR *normals, MATRIX local_light) { +#ifdef _EE __asm__ __volatile__ ( #if __GNUC__ > 3 "lqc2 $vf1, 0x00(%3) \n" @@ -519,6 +574,26 @@ void vector_triangle_normal(VECTOR output, VECTOR a, VECTOR b, VECTOR c) { : "+r" (output), "+r" (count), "+r" (normals) : "r" (local_light) : "memory" ); +#else + int i; + for (i = 0; i < count; i += 1) + { + int j; + memset(output[i], 0, sizeof(output[i])); + for (j = 0; j < 4; j += 1) + { + int k; + for (k = 0; k < 4; k += 1) + { + output[i][k] += local_light[(4 * j) + k] * (j != 3 ? normals[i][j] : 1.0f); + } + } + for (j = 0; j < 4; j += 1) + { + output[i][j] *= 1.0f / output[i][3]; + } + } +#endif } void calculate_lights(VECTOR *output, int count, VECTOR *normals, VECTOR *light_direction, VECTOR *light_colour, const int *light_type, int light_count) { @@ -587,6 +662,7 @@ void vector_triangle_normal(VECTOR output, VECTOR a, VECTOR b, VECTOR c) { } void calculate_vertices(VECTOR *output, int count, VECTOR *vertices, MATRIX local_screen) { +#ifdef _EE __asm__ __volatile__ ( #if __GNUC__ > 3 "lqc2 $vf1, 0x00(%3) \n" @@ -649,4 +725,47 @@ void vector_triangle_normal(VECTOR output, VECTOR a, VECTOR b, VECTOR c) { : "+r" (output), "+r" (count), "+r" (vertices) : "r" (local_screen) : "$10", "memory" ); +#else + int i; + for (i = 0; i < count; i += 1) + { + int j; + int clipped; + memset(output[i], 0, sizeof(output[i])); + for (j = 0; j < 4; j += 1) + { + int k; + for (k = 0; k < 4; k += 1) + { + output[i][k] += local_screen[(4 * j) + k] * (j != 3 ? vertices[i][j] : 1.0f); + } + } + clipped = 0; + for (j = 0; j < 3; j += 1) + { + // Note on differing behavior: vclipw will shift clip flag to the left 6 bits + // However, in this implementation, the leftover high bits are not saved or checked + if ((output[i][j] > fabsf(output[i][3])) || (output[i][j] < -fabsf(output[i][3]))) + { + clipped = 1; + break; + } + } + if (clipped) + { + for (j = 0; j < 3; j += 1) + { + output[i][j] *= 0.0f; + } + output[i][3] = 1.0f; + } + else + { + for (j = 0; j < 3; j += 1) + { + output[i][j] *= 1.0f / output[i][3]; + } + } + } +#endif } diff --git a/ee/mpeg/src/libmpeg.c b/ee/mpeg/src/libmpeg.c index cb4173d9b5bc..1e69f9eb81e4 100644 --- a/ee/mpeg/src/libmpeg.c +++ b/ee/mpeg/src/libmpeg.c @@ -13,6 +13,10 @@ #include #include #include +#ifndef _EE +// for MIN / MAX macros +#include +#endif #include "libmpeg.h" #include "ee_regs.h" @@ -882,6 +886,7 @@ static void _mpeg12_get_ref(_MPEGMacroBlock8 *apMBSrc, int aX, int anY, afAvg <<= 2; +#ifdef _EE __asm__ __volatile__( "pnor $v1, $zero, $zero \n" "ld $v0, %4 \n" @@ -893,6 +898,10 @@ static void _mpeg12_get_ref(_MPEGMacroBlock8 *apMBSrc, int aX, int anY, "sll %0, %0, 0 \n" : "=r"(lMBX), "=r"(lMBY) : "r"(lMBX), "r"(lMBY), "m"(s_MPEG12Ctx.m_MBWidth) : "v0", "v1"); +#else + lMBX = MIN(MAX(lMBX, 0), s_MPEG12Ctx.m_MBWidth - 1); + lMBY = MIN(MAX(lMBY, 0), s_MPEG12Ctx.m_MBWidth - 1); +#endif lpMotion->m_pSrc = (unsigned char *)(apMBSrc + lMBX + lMBY * s_MPEG12Ctx.m_MBWidth); lpMotion->m_pDstY = (short *)(s_MPEG12Ctx.m_pCurMotions->m_pSPRRes + (aFDst << 5)); diff --git a/ee/mpeg/src/libmpeg_core.c b/ee/mpeg/src/libmpeg_core.c index 8fe566018508..ff095ab44303 100644 --- a/ee/mpeg/src/libmpeg_core.c +++ b/ee/mpeg/src/libmpeg_core.c @@ -472,12 +472,16 @@ void _MPEG_SetDefQM(int arg0) q = (qword_t *)s_QmIntra; for (i = 0; i < 4; i++) { +#ifdef _EE __asm__ __volatile__( "lq $2, 0(%0) \n" "sq $2, 0(%1) \n" : : "d"(&q[i]), "d"(A_EE_IPU_in_FIFO) : "2"); +#else + *(qword_t *)A_EE_IPU_in_FIFO = q[i]; +#endif } *R_EE_IPU_CMD = IPU_COMMAND_SETIQ; @@ -486,12 +490,16 @@ void _MPEG_SetDefQM(int arg0) q = (qword_t *)s_QmNonIntra; for (i = 0; i < 4; i++) { +#ifdef _EE __asm__ __volatile__( "lq $2, 0(%0) \n" "sq $2, 0(%1) \n" : : "d"(&q[i]), "d"(A_EE_IPU_in_FIFO) : "2"); +#else + *(qword_t *)A_EE_IPU_in_FIFO = q[i]; +#endif } *R_EE_IPU_CMD = IPU_COMMAND_SETIQ | 0x08000000;