You might give this a go:

custtrig.h

`// I do not guarantee the correctness of this code.`

// compile with -O2 or -O3 to get function inlining.

inline double alt_sin(double x) {

asm ("fsin" : "+t" (x));

return x;

}

inline double alt_cos(double x) {

asm ("fcos" : "+t" (x));

return x;

}

inline double alt_tan(double x) {

asm ("fptan \n" // nasm documentation 0.99.02 fails to mention that fptan also pushes 1 onto x87 stack

"ffreep %%st(0)" : "+t" (x)

:

: "st(7)"); // I'm not sure if I'm doing this right.

return x;

}

inline double alt_atan(double x) {

asm ("fld1 \n"

"fpatan" : "+t" (x)

:

: "st(7)"); // I'm not sure if I'm doing this right.

return x;

}

// http://gcc.gnu.org/onlinedocs/gcc-4.7.2/gcc/Extended-Asm.html#Extended-Asm

`$CHECKING:OFF`

'add:

' -O3 -fno-strict-aliasing -fno-strict-overflow

'to the compiler command line in recompile.bat

DECLARE LIBRARY "custtrig"

FUNCTION alt_sin# (BYVAL x#)

FUNCTION alt_cos# (BYVAL x#)

FUNCTION alt_tan# (BYVAL x#)

FUNCTION alt_atan# (BYVAL x#)

END DECLARE

DIM s AS SINGLE, e AS SINGLE

DIM count AS LONG

DIM count2 AS DOUBLE, dummy AS DOUBLE

PRINT alt_sin(1), SIN(1)

PRINT alt_cos(1), COS(1)

PRINT alt_tan(1), TAN(1)

PRINT alt_atan(1), ATN(1)

PRINT alt_sin(2), SIN(2)

PRINT alt_cos(2), COS(2)

PRINT alt_tan(2), TAN(2)

PRINT alt_atan(2), ATN(2)

s = TIMER(.001)

FOR count = 1 TO 10000

FOR count2 = 0 TO 6.283# STEP .001#

dummy = SIN(count2)

dummy = COS(count2)

NEXT

NEXT

e = TIMER(.001) - s

PRINT e, "sin/cos"

s = TIMER(.001)

FOR count = 1 TO 10000

FOR count2 = 0 TO 6.283# STEP .001#

dummy = TAN(count2)

dummy = ATN(count2)

NEXT

NEXT

e = TIMER(.001) - s

PRINT e, "tan/atn"

s = TIMER(.001)

FOR count = 1 TO 10000

FOR count2 = 0 TO 6.283# STEP .001#

dummy = alt_sin(count2)

dummy = alt_cos(count2)

NEXT

NEXT

e = TIMER(.001) - s

PRINT e, "alt_sin/alt_cos"

s = TIMER(.001)

FOR count = 1 TO 10000

FOR count2 = 0 TO 6.283# STEP .001#

dummy = alt_tan(count2)

dummy = alt_atan(count2)

NEXT

NEXT

e = TIMER(.001) - s

PRINT e, "alt_tan/alt_atan"

s = TIMER(.001)

FOR count = 1 TO 10000

FOR count2 = 0 TO 6.283# STEP .001#

dummy = 0

dummy = 0

NEXT

NEXT

e = TIMER(.001) - s

PRINT e, "empty"

PRINT "press any key": SLEEP: SYSTEM

Please compare it with and without modifying recompile.bat.

Regards,

Michael

P.S. Here's a C++ version, for comparison.

`#include <windows.h>`

#include <stdio.h>

#include <math.h>

#include "custtrig.h"

int main () {

DWORD s, e;

UINT count;

double count2;

volatile double dummy; // volatile

s = GetTickCount();

for (count = 1; count <= 10000; ++count) {

for (count2 = 0; count2 <= 6.283; count2 += 0.001) {

dummy = sin(*(volatile double*) &count2); // cast through pointer to volatile double

dummy = cos(*(volatile double*) &count2);

}

}

e = GetTickCount();

printf("%u\t sin/cos\n", e - s);

s = GetTickCount();

for (count = 1; count <= 10000; ++count) {

for (count2 = 0; count2 <= 6.283; count2 += 0.001) {

dummy = tan(*(volatile double*) &count2);

dummy = atan(*(volatile double*) &count2);

}

}

e = GetTickCount();

printf("%u\t tan/atan\n", e - s);

s = GetTickCount();

for (count = 1; count <= 10000; ++count) {

for (count2 = 0; count2 <= 6.283; count2 += 0.001) {

dummy = alt_sin(*(volatile double*) &count2);

dummy = alt_cos(*(volatile double*) &count2);

}

}

e = GetTickCount();

printf("%u\t alt_sin/alt_cos\n", e - s);

s = GetTickCount();

for (count = 1; count <= 10000; ++count) {

for (count2 = 0; count2 <= 6.283; count2 += 0.001) {

dummy = alt_tan(*(volatile double*) &count2);

dummy = alt_atan(*(volatile double*) &count2);

}

}

e = GetTickCount();

printf("%u\t alt_tan/alt_atan\n", e - s);

s = GetTickCount();

for (count = 1; count <= 10000; ++count) {

for (count2 = 0; count2 <= 6.283; count2 += 0.001) {

dummy = 0;

dummy = 0;

}

}

e = GetTickCount();

printf("%u\t empty\n", e - s);

return 0;

}

It lacks a lot of the overhead of the QB64 code. However, I had to use volatile to prevent the trig from being optimized away.

Typical results for me are approximately:

QB64 normal

12.8

26

11.9

16.1

1.7

QB64 with -O3 -fno-strict-aliasing -fno-strict-overflow

12.4

26

9.5

13.4

1

C++ without -O3

10.3

23.9

10.1

14.1

.4

C++ with -O3

10.5

23.5

7.4

11.1

.2