diff --git a/README.md b/README.md index 88c3c8a..6f32916 100644 --- a/README.md +++ b/README.md @@ -34,12 +34,16 @@ For the standalone you will at least need freeglut as well as openal-soft, it sh Also you can use this as a core for retroarch by using the Makefile in the libretro folder. There are several defines you can add/remove for different options for both the standalone version and the retroarch core: +AUDIO_LOWERFREQ +Reduces the output frequency of the emulator for more performance COL_32BIT Changes the output texture to be 32bit per pixel (RGBA8) instead of 16bit (RGB565) COL_BGRA Used in combination with COL_32BIT, this will change the output texture to BGRA8 instead of RGBA8 COL_TEX_BSWAP -In 32bit per pixel mode, this will change the byte order of the output texture, in 16bit mode this will output in BGR565 +In 32bit per pixel mode, this will change the byte order of the output texture, in 16bit mode this will output in BGR565 +DO_INLINE_ATTRIBS +Will use optimized function inline attributes for more overall performance These defines are specific to the standalone version: AUDIO_FLOAT diff --git a/apu.c b/apu.c index 8feab1b..2e582d1 100644 --- a/apu.c +++ b/apu.c @@ -188,7 +188,11 @@ void apuInitBufs() //apu.Frequency = nesPAL ? 831187 : 893415; //effective frequencies for Original PPU Video out //apu.Frequency = nesPAL ? 831303 : 894886; +#if AUDIO_LOWERFREQ + apu.Frequency = nesPAL ? 51956 : 55930; +#else apu.Frequency = nesPAL ? 207825 : 223721; +#endif audioExpansion = 0; double dt = 1.0/((double)apu.Frequency); //LP at 22kHz @@ -213,11 +217,11 @@ void apuInitBufs() #if AUDIO_FLOAT apu.BufSizeBytes = apu.BufSize*sizeof(float); apu.OutBuf = (float*)malloc(apu.BufSizeBytes); - printf("Audio: 32-bit Float Output\n"); + printf("Audio: 32-bit Float Output at %iHz\n", apu.Frequency); #else apu.BufSizeBytes = apu.BufSize*sizeof(int16_t); apu.OutBuf = (int16_t*)malloc(apu.BufSizeBytes); - printf("Audio: 16-bit Short Output\n"); + printf("Audio: 16-bit Short Output at %iHz\n", apu.Frequency); #endif /* https://wiki.nesdev.com/w/index.php/APU_Mixer#Lookup_Table */ uint8_t i; @@ -314,7 +318,7 @@ void apuWriteDMCBuf(uint8_t val) extern bool cpu_odd_cycle; -static void apuChangeMode() +FIXNES_NOINLINE static void apuChangeMode() { if(!cpu_odd_cycle) return; @@ -356,57 +360,49 @@ void doEnvelopeLogic(envelope_t *env) //env->envelope = (env->constant ? env->vol : env->decay); } -void sweepUpdateFreq(sweep_t *sw, uint16_t *freq) +static void sweepUpdateFreq(sweep_t *sw, uint16_t *freq) { - uint16_t inFreq = *freq; - if(sw->shift > 0) + //any freq update causes target freq update + sw->targetFreq = *freq; + if(sw->targetFreq >= 8) { if(sw->negative) { - inFreq -= (inFreq >> sw->shift); - if(sw->chan1 == true) inFreq--; + sw->targetFreq -= (sw->targetFreq >> sw->shift); + if(sw->chan1 == true) sw->targetFreq--; } else - inFreq += (inFreq >> sw->shift); - } - if(inFreq > 8 && (inFreq < 0x7FF)) - { - sw->mute = false; - if(sw->enabled && sw->shift) - *freq = inFreq; + sw->targetFreq += (sw->targetFreq >> sw->shift); + if(sw->targetFreq <= 0x7FF) + sw->mute = false; + else //larger than freq register + sw->mute = true; } - else + else //any input < 8 gets muted sw->mute = true; } -void doSweepLogic(sweep_t *sw, uint16_t *freq) +static void doSweepLogic(sweep_t *sw, uint16_t *freq) { - if(sw->start) - { - uint8_t prevDiv = sw->divider; - sw->divider = sw->period; - sw->start = false; - if(prevDiv == 0) - sweepUpdateFreq(sw, freq); - } - else + if(sw->divider == 0) { - if(sw->divider == 0) + if(sw->enabled && sw->shift && !sw->mute) { + *freq = sw->targetFreq; sweepUpdateFreq(sw, freq); - sw->divider = sw->period; } - else - sw->divider--; + sw->divider = sw->period; } - //gets clocked too little on its own? - /*if(inFreq < 8 || (inFreq >= 0x7FF)) - sw->mute = true; else - sw->mute = false;*/ + sw->divider--; + if(sw->start) + { + sw->divider = sw->period; + sw->start = false; + } } -void apuClockA() +FIXNES_NOINLINE static void apuClockA() { if(apu.p1LengthCtr) { @@ -426,7 +422,7 @@ void apuClockA() apu.noiseLengthCtr--; } -void apuClockB() +FIXNES_NOINLINE static void apuClockB() { if(apu.p1LengthCtr) doEnvelopeLogic(&apu.p1Env); @@ -442,19 +438,23 @@ void apuClockB() apu.trireload = false; } -void apuCycle() +FIXNES_ALWAYSINLINE void apuCycle() { uint8_t aExp = audioExpansion; +#if AUDIO_LOWERFREQ + if(!(apu.apuClock&31)) +#else if(!(apu.apuClock&7)) +#endif { if(apu.p1LengthCtr && (apu.reg[0x15] & P1_ENABLE)) { - if(!apu.p1Sweep.mute && apu.freq1 >= 8 && apu.freq1 < 0x7FF) + if(!apu.p1Sweep.mute) apu.p1Out = apu.p1seq[apu.p1Cycle] ? (apu.p1Env.constant ? apu.p1Env.vol : apu.p1Env.decay) : 0; } if(apu.p2LengthCtr && (apu.reg[0x15] & P2_ENABLE)) { - if(!apu.p2Sweep.mute && apu.freq2 >= 8 && apu.freq2 < 0x7FF) + if(!apu.p2Sweep.mute) apu.p2Out = apu.p2seq[apu.p2Cycle] ? (apu.p2Env.constant ? apu.p2Env.vol : apu.p2Env.decay) : 0; } if(apu.triLengthCtr && apu.triCurLinearCtr && (apu.reg[0x15] & TRI_ENABLE)) @@ -713,8 +713,6 @@ void apuSetReg00(uint16_t addr, uint8_t val) apu.p1seq = pulseSeqs[val>>6]; apu.p1Env.constant = ((val&PULSE_CONST_V) != 0); apu.p1Env.loop = apu.p1haltloop = ((val&PULSE_HALT_LOOP) != 0); - if(apu.freq1 > 8 && (apu.freq1 < 0x7FF)) - apu.p1Sweep.mute = false; //to be safe } void apuSetReg01(uint16_t addr, uint8_t val) { @@ -726,9 +724,8 @@ void apuSetReg01(uint16_t addr, uint8_t val) apu.p1Sweep.period = (val>>4)&7; apu.p1Sweep.negative = ((val&0x8) != 0); apu.p1Sweep.start = true; - if(apu.freq1 > 8 && (apu.freq1 < 0x7FF)) - apu.p1Sweep.mute = false; //to be safe - doSweepLogic(&apu.p1Sweep, &apu.freq1); + //adjust for new sweep regs + sweepUpdateFreq(&apu.p1Sweep, &apu.freq1); } void apuSetReg02(uint16_t addr, uint8_t val) { @@ -736,8 +733,7 @@ void apuSetReg02(uint16_t addr, uint8_t val) apu.reg[2] = val; //printf("P1 time low %02x\n", val); apu.freq1 = ((apu.freq1&~0xFF) | val); - if(apu.freq1 > 8 && (apu.freq1 < 0x7FF)) - apu.p1Sweep.mute = false; //to be safe + sweepUpdateFreq(&apu.p1Sweep, &apu.freq1); } void apuSetReg03(uint16_t addr, uint8_t val) { @@ -747,8 +743,7 @@ void apuSetReg03(uint16_t addr, uint8_t val) if(apu.reg[0x15] & P1_ENABLE) apu.p1LengthCtr = apu.lengthLookupTbl[val>>3]; apu.freq1 = (apu.freq1&0xFF) | ((val&7)<<8); - if(apu.freq1 > 8 && (apu.freq1 < 0x7FF)) - apu.p1Sweep.mute = false; //to be safe + sweepUpdateFreq(&apu.p1Sweep, &apu.freq1); //printf("P1 new freq %04x\n", apu.freq1); apu.p1Env.start = true; } @@ -760,8 +755,6 @@ void apuSetReg04(uint16_t addr, uint8_t val) apu.p2seq = pulseSeqs[val>>6]; apu.p2Env.constant = ((val&PULSE_CONST_V) != 0); apu.p2Env.loop = apu.p2haltloop = ((val&PULSE_HALT_LOOP) != 0); - if(apu.freq2 > 8 && (apu.freq2 < 0x7FF)) - apu.p2Sweep.mute = false; //to be safe } void apuSetReg05(uint16_t addr, uint8_t val) { @@ -773,9 +766,8 @@ void apuSetReg05(uint16_t addr, uint8_t val) apu.p2Sweep.period = (val>>4)&7; apu.p2Sweep.negative = ((val&0x8) != 0); apu.p2Sweep.start = true; - if(apu.freq2 > 8 && (apu.freq2 < 0x7FF)) - apu.p2Sweep.mute = false; //to be safe - doSweepLogic(&apu.p2Sweep, &apu.freq2); + //adjust for new sweep regs + sweepUpdateFreq(&apu.p2Sweep, &apu.freq2); } void apuSetReg06(uint16_t addr, uint8_t val) { @@ -783,8 +775,7 @@ void apuSetReg06(uint16_t addr, uint8_t val) apu.reg[6] = val; //printf("P2 time low %02x\n", val); apu.freq2 = ((apu.freq2&~0xFF) | val); - if(apu.freq2 > 8 && (apu.freq2 < 0x7FF)) - apu.p2Sweep.mute = false; //to be safe + sweepUpdateFreq(&apu.p2Sweep, &apu.freq2); } void apuSetReg07(uint16_t addr, uint8_t val) { @@ -794,8 +785,7 @@ void apuSetReg07(uint16_t addr, uint8_t val) if(apu.reg[0x15] & P2_ENABLE) apu.p2LengthCtr = apu.lengthLookupTbl[val>>3]; apu.freq2 = (apu.freq2&0xFF) | ((val&7)<<8); - if(apu.freq2 > 8 && (apu.freq2 < 0x7FF)) - apu.p2Sweep.mute = false; //to be safe + sweepUpdateFreq(&apu.p2Sweep, &apu.freq2); //printf("P2 new freq %04x\n", apu.freq2); apu.p2Env.start = true; } diff --git a/apu.h b/apu.h index c5cc455..0953c23 100644 --- a/apu.h +++ b/apu.h @@ -8,6 +8,8 @@ #ifndef _apu_h_ #define _apu_h_ +#include "common.h" + #define NUM_BUFFERS 4 #define EXP_VRC6 (1<<0) @@ -20,7 +22,7 @@ void apuInitBufs(); void apuDeinitBufs(); void apuInit(); -void apuCycle(); +FIXNES_ALWAYSINLINE void apuCycle(); void apuWriteDMCBuf(uint8_t val); uint8_t *apuGetBuf(); uint32_t apuGetBufSize(); @@ -69,6 +71,9 @@ typedef struct _sweep_t { uint8_t period; uint8_t divider; uint8_t shift; + //specifically signed for pulse 1 as a shift value of 0 and negative + //flag enabled would cause an overflow and a mute otherwise + int16_t targetFreq; } sweep_t; void doEnvelopeLogic(envelope_t *env); diff --git a/build_linux.sh b/build_linux.sh index 41299e3..bc8d9d3 100755 --- a/build_linux.sh +++ b/build_linux.sh @@ -2,6 +2,6 @@ #Need to replace this with a makefile -gcc -DZIPSUPPORT main.c apu.c audio.c audio_fds.c audio_mmc5.c audio_vrc6.c audio_vrc7.c audio_n163.c audio_s5b.c alhelpers.c cpu.c ppu.c mem.c input.c mapper.c mapperList.c fm2play.c vrc_irq.c mapper/*.c unzip/*.c -DFREEGLUT_STATIC -lglut -lopenal -lGL -lGLU -lm -Wall -lz -Wextra -O3 -flto -s -o fixNES +gcc -DZIPSUPPORT main.c apu.c audio.c audio_fds.c audio_mmc5.c audio_vrc6.c audio_vrc7.c audio_n163.c audio_s5b.c alhelpers.c cpu.c ppu.c mem.c input.c mapper.c mapperList.c fm2play.c vrc_irq.c mapper/*.c unzip/*.c -DFREEGLUT_STATIC -lglut -lopenal -lGL -lGLU -lm -Wall -lz -Wextra -DDO_INLINE_ATTRIBS=1 -Wno-attributes -O3 -flto -s -o fixNES echo "Succesfully built fixNES" diff --git a/build_msys32.sh b/build_msys32.sh index f4c70e5..62e36d2 100644 --- a/build_msys32.sh +++ b/build_msys32.sh @@ -1,2 +1,2 @@ #!/bin/sh -gcc -DWINDOWS_BUILD -DZIPSUPPORT main.c apu.c audio.c audio_fds.c audio_mmc5.c audio_vrc6.c audio_vrc7.c audio_n163.c audio_s5b.c alhelpers.c cpu.c ppu.c mem.c input.c mapper.c mapperList.c fm2play.c vrc_irq.c mapper/*.c unzip/*.c -DFREEGLUT_STATIC -lfreeglut_static -lopenal32 -lopengl32 -lglu32 -lgdi32 -lwinmm -lz -Wall -Wextra -O3 -flto -s -o fixNES \ No newline at end of file +gcc -DWINDOWS_BUILD -DZIPSUPPORT main.c apu.c audio.c audio_fds.c audio_mmc5.c audio_vrc6.c audio_vrc7.c audio_n163.c audio_s5b.c alhelpers.c cpu.c ppu.c mem.c input.c mapper.c mapperList.c fm2play.c vrc_irq.c mapper/*.c unzip/*.c -DFREEGLUT_STATIC -lfreeglut_static -lopenal32 -lopengl32 -lglu32 -lgdi32 -lwinmm -lz -Wall -Wextra -DDO_INLINE_ATTRIBS=1 -Wno-attributes -O3 -flto -s -o fixNES \ No newline at end of file diff --git a/build_windows_console.bat b/build_windows_console.bat deleted file mode 100644 index 25de828..0000000 --- a/build_windows_console.bat +++ /dev/null @@ -1,2 +0,0 @@ -gcc -DWINDOWS_BUILD -DZIPSUPPORT main.c apu.c audio.c audio_fds.c audio_mmc5.c audio_vrc6.c audio_vrc7.c audio_n163.c audio_s5b.c alhelpers.c cpu.c ppu.c mem.c input.c mapper.c mapperList.c fm2play.c vrc_irq.c mapper/*.c unzip/*.c -DFREEGLUT_STATIC -lfreeglut_static -lopenal32 -lopengl32 -lglu32 -lgdi32 -lwinmm -lz -Wall -Wextra -O3 -flto -s -o fixNES -pause \ No newline at end of file diff --git a/build_windows_noconsole.bat b/build_windows_noconsole.bat deleted file mode 100644 index e81209c..0000000 --- a/build_windows_noconsole.bat +++ /dev/null @@ -1,2 +0,0 @@ -gcc -DWINDOWS_BUILD -DZIPSUPPORT main.c apu.c audio.c audio_fds.c audio_mmc5.c audio_vrc6.c audio_vrc7.c audio_n163.c audio_s5b.c alhelpers.c cpu.c ppu.c mem.c input.c mapper.c mapperList.c fm2play.c vrc_irq.c mapper/*.c unzip/*.c -DFREEGLUT_STATIC -lfreeglut_static -lopenal32 -lopengl32 -lglu32 -lgdi32 -lwinmm -lz -Wall -Wextra -O3 -flto -s -o fixNES -Wl,--subsystem,windows -pause diff --git a/common.h b/common.h index fd08cf6..c2548e7 100644 --- a/common.h +++ b/common.h @@ -1,10 +1,17 @@ #ifndef _common_h_ #define _common_h_ +#if DO_INLINE_ATTRIBS #ifdef _MSC_VER #define FIXNES_NOINLINE __declspec(noinline) +#define FIXNES_ALWAYSINLINE __forceinline #else #define FIXNES_NOINLINE __attribute__((noinline)) +#define FIXNES_ALWAYSINLINE __attribute__((always_inline)) +#endif +#else +#define FIXNES_NOINLINE +#define FIXNES_ALWAYSINLINE #endif #endif diff --git a/cpu.c b/cpu.c index 84407c7..86df8f8 100644 --- a/cpu.c +++ b/cpu.c @@ -25,6 +25,9 @@ #define P_FLAG_OVERFLOW (1<<6) #define P_FLAG_NEGATIVE (1<<7) +#define CPU_OAM_DMA (1<<0) +#define CPU_DMC_DMA (1<<1) + extern bool nesPause; //used externally @@ -41,12 +44,11 @@ static struct { uint8_t instr; uint8_t irq; uint8_t irqMask; + uint8_t dma; bool boot; bool reset; bool needsIndFix; bool allow_update_irq; - bool oam_dma; - bool dmc_dma; bool currently_dma; bool oam_ready; bool dmc_dma_dummyread; @@ -84,8 +86,7 @@ void cpuInit() cpu.instr = 0; cpu.irq = 0; cpu.irqMask = 0; - cpu.oam_dma = false; - cpu.dmc_dma = false; + cpu.dma = 0; cpu.currently_dma = false; cpu.oam_ready = false; cpu.dmc_dma_dummyread = false; @@ -108,7 +109,7 @@ void cpuInit() nsf_endPlayback = false; } -static void setRegStats(uint8_t reg) +FIXNES_ALWAYSINLINE inline static void setRegStats(uint8_t reg) { if(reg == 0) { @@ -125,7 +126,7 @@ static void setRegStats(uint8_t reg) } } -static void cpuSetARRRegs() +FIXNES_ALWAYSINLINE inline static void cpuSetARRRegs() { if((cpu.a & ((1<<5) | (1<<6))) == ((1<<5) | (1<<6))) { @@ -151,25 +152,25 @@ static void cpuSetARRRegs() /* Helper functions for updating reg sets */ -static inline void cpuSetA(uint8_t val) +FIXNES_ALWAYSINLINE inline static void cpuSetA(uint8_t val) { cpu.a = val; setRegStats(cpu.a); } -static inline void cpuSetX(uint8_t val) +FIXNES_ALWAYSINLINE inline static void cpuSetX(uint8_t val) { cpu.x = val; setRegStats(cpu.x); } -static inline void cpuSetY(uint8_t val) +FIXNES_ALWAYSINLINE inline static void cpuSetY(uint8_t val) { cpu.y = val; setRegStats(cpu.y); } -static inline uint8_t cpuSetTMP(uint8_t val) +FIXNES_ALWAYSINLINE inline static uint8_t cpuSetTMP(uint8_t val) { setRegStats(val); return val; @@ -177,25 +178,25 @@ static inline uint8_t cpuSetTMP(uint8_t val) /* Various Instructions used multiple times */ -static inline void cpuAND() +FIXNES_ALWAYSINLINE inline static void cpuAND() { cpu.a &= cpu.tmp; setRegStats(cpu.a); } -static inline void cpuORA() +FIXNES_ALWAYSINLINE inline static void cpuORA() { cpu.a |= cpu.tmp; setRegStats(cpu.a); } -static inline void cpuEOR() +FIXNES_ALWAYSINLINE inline static void cpuEOR() { cpu.a ^= cpu.tmp; setRegStats(cpu.a); } -static uint8_t cpuASL(uint8_t val) +FIXNES_ALWAYSINLINE inline static uint8_t cpuASL(uint8_t val) { if(val & (1<<7)) cpu.p |= P_FLAG_CARRY; @@ -206,10 +207,10 @@ static uint8_t cpuASL(uint8_t val) return val; } -static void cpuASLa() { cpuSetA(cpuASL(cpu.a)); }; -static void cpuASLt() { cpu.tmp = cpuASL(cpu.tmp); }; +FIXNES_ALWAYSINLINE inline static void cpuASLa() { cpuSetA(cpuASL(cpu.a)); }; +FIXNES_ALWAYSINLINE inline static void cpuASLt() { cpu.tmp = cpuASL(cpu.tmp); }; -static uint8_t cpuLSR(uint8_t val) +FIXNES_ALWAYSINLINE inline static uint8_t cpuLSR(uint8_t val) { if(val & (1<<0)) cpu.p |= P_FLAG_CARRY; @@ -220,10 +221,10 @@ static uint8_t cpuLSR(uint8_t val) return val; } -static void cpuLSRa() { cpuSetA(cpuLSR(cpu.a)); }; -static void cpuLSRt() { cpu.tmp = cpuLSR(cpu.tmp); }; +FIXNES_ALWAYSINLINE inline static void cpuLSRa() { cpuSetA(cpuLSR(cpu.a)); }; +FIXNES_ALWAYSINLINE inline static void cpuLSRt() { cpu.tmp = cpuLSR(cpu.tmp); }; -static uint8_t cpuROL(uint8_t val) +FIXNES_ALWAYSINLINE inline static uint8_t cpuROL(uint8_t val) { uint8_t oldP = cpu.p; if(val & (1<<7)) @@ -237,10 +238,10 @@ static uint8_t cpuROL(uint8_t val) return val; } -static void cpuROLa() { cpuSetA(cpuROL(cpu.a)); }; -static void cpuROLt() { cpu.tmp = cpuROL(cpu.tmp); }; +FIXNES_ALWAYSINLINE inline static void cpuROLa() { cpuSetA(cpuROL(cpu.a)); }; +FIXNES_ALWAYSINLINE inline static void cpuROLt() { cpu.tmp = cpuROL(cpu.tmp); }; -static uint8_t cpuROR(uint8_t val) +FIXNES_ALWAYSINLINE inline static uint8_t cpuROR(uint8_t val) { uint8_t oldP = cpu.p; if(val & (1<<0)) @@ -254,16 +255,16 @@ static uint8_t cpuROR(uint8_t val) return val; } -static void cpuRORa() { cpuSetA(cpuROR(cpu.a)); }; -static void cpuRORt() { cpu.tmp = cpuROR(cpu.tmp); }; +FIXNES_ALWAYSINLINE inline static void cpuRORa() { cpuSetA(cpuROR(cpu.a)); }; +FIXNES_ALWAYSINLINE inline static void cpuRORt() { cpu.tmp = cpuROR(cpu.tmp); }; -static void cpuKIL() +FIXNES_ALWAYSINLINE inline static void cpuKIL() { printf("Processor Requested Lock-Up at %04x\n", cpu.pc-1); nesPause = true; } -static void cpuADCv(uint8_t in) +FIXNES_ALWAYSINLINE inline static void cpuADCv(uint8_t in) { //use uint16_t here to easly detect carry uint16_t res = cpu.a + in; @@ -286,14 +287,14 @@ static void cpuADCv(uint8_t in) cpuSetA(res); } -static void cpuADC() { cpuADCv(cpu.tmp); } +FIXNES_ALWAYSINLINE inline static void cpuADC() { cpuADCv(cpu.tmp); } -static void cpuSBC() { cpuADCv(~cpu.tmp); } +FIXNES_ALWAYSINLINE inline static void cpuSBC() { cpuADCv(~cpu.tmp); } -static void cpuISC() { cpu.tmp = cpuSetTMP(cpu.tmp+1); cpuSBC(); } +FIXNES_ALWAYSINLINE inline static void cpuISC() { cpu.tmp = cpuSetTMP(cpu.tmp+1); cpuSBC(); } -static uint8_t cpuCMP(uint8_t reg) +FIXNES_ALWAYSINLINE inline static uint8_t cpuCMP(uint8_t reg) { if(reg >= cpu.tmp) cpu.p |= P_FLAG_CARRY; @@ -305,13 +306,13 @@ static uint8_t cpuCMP(uint8_t reg) return cmpVal; } -static void cpuCMPa() { cpuCMP(cpu.a); } -static void cpuCMPx() { cpuCMP(cpu.x); } -static void cpuCMPy() { cpuCMP(cpu.y); } -static void cpuCMPax() { cpu.x = cpuCMP(cpu.a&cpu.x); } -static void cpuDCP() { cpu.tmp = cpuSetTMP(cpu.tmp-1); cpuCMPa(); } +FIXNES_ALWAYSINLINE inline static void cpuCMPa() { cpuCMP(cpu.a); } +FIXNES_ALWAYSINLINE inline static void cpuCMPx() { cpuCMP(cpu.x); } +FIXNES_ALWAYSINLINE inline static void cpuCMPy() { cpuCMP(cpu.y); } +FIXNES_ALWAYSINLINE inline static void cpuCMPax() { cpu.x = cpuCMP(cpu.a&cpu.x); } +FIXNES_ALWAYSINLINE inline static void cpuDCP() { cpu.tmp = cpuSetTMP(cpu.tmp-1); cpuCMPa(); } -static void cpuBIT() +FIXNES_ALWAYSINLINE inline static void cpuBIT() { if((cpu.a & cpu.tmp) == 0) cpu.p |= P_FLAG_ZERO; @@ -329,20 +330,20 @@ static void cpuBIT() cpu.p &= ~P_FLAG_NEGATIVE; } -static void cpuSLO() { cpuASLt(); cpuORA(); } -static void cpuRLA() { cpuROLt(); cpuAND(); } -static void cpuSRE() { cpuLSRt(); cpuEOR(); } -static void cpuRRA() { cpuRORt(); cpuADC(); } -static void cpuASR() { cpuAND(); cpuLSRa(); } +FIXNES_ALWAYSINLINE inline static void cpuSLO() { cpuASLt(); cpuORA(); } +FIXNES_ALWAYSINLINE inline static void cpuRLA() { cpuROLt(); cpuAND(); } +FIXNES_ALWAYSINLINE inline static void cpuSRE() { cpuLSRt(); cpuEOR(); } +FIXNES_ALWAYSINLINE inline static void cpuRRA() { cpuRORt(); cpuADC(); } +FIXNES_ALWAYSINLINE inline static void cpuASR() { cpuAND(); cpuLSRa(); } -static void cpuARR() +FIXNES_ALWAYSINLINE inline static void cpuARR() { cpuAND(); cpuRORa(); cpuSetARRRegs(); } -static void cpuAAC() +FIXNES_ALWAYSINLINE inline static void cpuAAC() { cpuAND(); if(cpu.p & P_FLAG_NEGATIVE) @@ -351,40 +352,40 @@ static void cpuAAC() cpu.p &= ~P_FLAG_CARRY; } -static void cpuCLC() { cpu.p &= ~P_FLAG_CARRY; } -static void cpuSEC() { cpu.p |= P_FLAG_CARRY; } +FIXNES_ALWAYSINLINE inline static void cpuCLC() { cpu.p &= ~P_FLAG_CARRY; } +FIXNES_ALWAYSINLINE inline static void cpuSEC() { cpu.p |= P_FLAG_CARRY; } -static void cpuCLI() { cpu.p_irq_req = 2; } -static void cpuSEI() { cpu.p_irq_req = 1; } +FIXNES_ALWAYSINLINE inline static void cpuCLI() { cpu.p_irq_req = 2; } +FIXNES_ALWAYSINLINE inline static void cpuSEI() { cpu.p_irq_req = 1; } -static void cpuCLV() { cpu.p &= ~P_FLAG_OVERFLOW; } +FIXNES_ALWAYSINLINE inline static void cpuCLV() { cpu.p &= ~P_FLAG_OVERFLOW; } -static void cpuCLD() { cpu.p &= ~P_FLAG_DECIMAL; } -static void cpuSED() { cpu.p |= P_FLAG_DECIMAL; } +FIXNES_ALWAYSINLINE inline static void cpuCLD() { cpu.p &= ~P_FLAG_DECIMAL; } +FIXNES_ALWAYSINLINE inline static void cpuSED() { cpu.p |= P_FLAG_DECIMAL; } -static void cpuINX() { cpuSetX(cpu.x+1); } -static void cpuINY() { cpuSetY(cpu.y+1); } -static void cpuINCt() { cpu.tmp = cpuSetTMP(cpu.tmp+1); } +FIXNES_ALWAYSINLINE inline static void cpuINX() { cpuSetX(cpu.x+1); } +FIXNES_ALWAYSINLINE inline static void cpuINY() { cpuSetY(cpu.y+1); } +FIXNES_ALWAYSINLINE inline static void cpuINCt() { cpu.tmp = cpuSetTMP(cpu.tmp+1); } -static void cpuDEX() { cpuSetX(cpu.x-1); } -static void cpuDEY() { cpuSetY(cpu.y-1); } -static void cpuDECt() { cpu.tmp = cpuSetTMP(cpu.tmp-1); } +FIXNES_ALWAYSINLINE inline static void cpuDEX() { cpuSetX(cpu.x-1); } +FIXNES_ALWAYSINLINE inline static void cpuDEY() { cpuSetY(cpu.y-1); } +FIXNES_ALWAYSINLINE inline static void cpuDECt() { cpu.tmp = cpuSetTMP(cpu.tmp-1); } -static void cpuTXA() { cpuSetA(cpu.x); } -static void cpuTYA() { cpuSetA(cpu.y); } -static void cpuTSX() { cpuSetX(cpu.s); } -static void cpuTXS() { cpu.s = cpu.x; } +FIXNES_ALWAYSINLINE inline static void cpuTXA() { cpuSetA(cpu.x); } +FIXNES_ALWAYSINLINE inline static void cpuTYA() { cpuSetA(cpu.y); } +FIXNES_ALWAYSINLINE inline static void cpuTSX() { cpuSetX(cpu.s); } +FIXNES_ALWAYSINLINE inline static void cpuTXS() { cpu.s = cpu.x; } -static void cpuTAY() { cpuSetY(cpu.a); } -static void cpuTAX() { cpuSetX(cpu.a); } +FIXNES_ALWAYSINLINE inline static void cpuTAY() { cpuSetY(cpu.a); } +FIXNES_ALWAYSINLINE inline static void cpuTAX() { cpuSetX(cpu.a); } -static void cpuLDA() { cpuSetA(cpu.tmp); } -static void cpuLDX() { cpuSetX(cpu.tmp); } -static void cpuLDY() { cpuSetY(cpu.tmp); } -static void cpuXAA() { cpuSetA(cpu.x&cpu.tmp); } -static void cpuAXT() { cpuSetA(cpu.tmp); cpuSetX(cpu.a); } -static void cpuLAX() { cpuSetA(cpu.tmp); cpuSetX(cpu.tmp); } -static void cpuLAR() { cpuSetA(cpu.tmp); cpuAND(cpu.s); cpuSetX(cpu.a); cpu.s = cpu.a; } +FIXNES_ALWAYSINLINE inline static void cpuLDA() { cpuSetA(cpu.tmp); } +FIXNES_ALWAYSINLINE inline static void cpuLDX() { cpuSetX(cpu.tmp); } +FIXNES_ALWAYSINLINE inline static void cpuLDY() { cpuSetY(cpu.tmp); } +FIXNES_ALWAYSINLINE inline static void cpuXAA() { cpuSetA(cpu.x&cpu.tmp); } +FIXNES_ALWAYSINLINE inline static void cpuAXT() { cpuSetA(cpu.tmp); cpuSetX(cpu.a); } +FIXNES_ALWAYSINLINE inline static void cpuLAX() { cpuSetA(cpu.tmp); cpuSetX(cpu.tmp); } +FIXNES_ALWAYSINLINE inline static void cpuLAR() { cpu.a = cpu.s; cpuAND(); cpuSetX(cpu.a); cpu.s = cpu.a; } /* For Interrupt Handling */ @@ -541,7 +542,7 @@ static const uint8_t cpu_mem_type[CPU_STATE_END] = { static const uint8_t cpu_start_arr[1] = { CPU_GET_INSTRUCTION }; -static void cpuSetStartArray() +FIXNES_ALWAYSINLINE inline static void cpuSetStartArray() { cpu.action_arr = cpu_start_arr; cpu.arr_pos = 0; @@ -604,20 +605,20 @@ static const uint8_t cpu_bmi_arr[4] = { CPU_TMP_READ8_PC_INC_CHECK_BMI, CPU_BRAN static const uint8_t cpu_bvc_arr[4] = { CPU_TMP_READ8_PC_INC_CHECK_BVC, CPU_BRANCH_SETUP, CPU_NULL_READ8_PC_CHK, CPU_GET_INSTRUCTION }; static const uint8_t cpu_bvs_arr[4] = { CPU_TMP_READ8_PC_INC_CHECK_BVS, CPU_BRANCH_SETUP, CPU_NULL_READ8_PC_CHK, CPU_GET_INSTRUCTION }; -static void cpuCheckIrq() +FIXNES_ALWAYSINLINE inline static void cpuCheckIrq() { cpu.irq |= (interrupt & cpu.irqMask) | ppuNMI(); } /* useful for the branch checks */ -static void cpuBranchCheck(bool takeBranch) +FIXNES_ALWAYSINLINE inline static void cpuBranchCheck(bool takeBranch) { cpuCheckIrq(); if(!takeBranch) //get next instruction cpuSetStartArray(); } -static void cpuBranchSetup() +FIXNES_ALWAYSINLINE inline static void cpuBranchSetup() { cpu.indVal = cpu.pc + (int8_t)cpu.tmp; //only need extra cycle if it needs fixup @@ -812,7 +813,7 @@ static bool cpuHandleIrqUpdates() return false; } -static void cpuSetAddrIndFix() +FIXNES_ALWAYSINLINE inline static void cpuSetAddrIndFix() { //first read will be at wrong pos //so let cpu know it needs fixup @@ -823,7 +824,7 @@ static void cpuSetAddrIndFix() } } -static bool cpuDoAddrIndFix() +FIXNES_ALWAYSINLINE inline static bool cpuDoAddrIndFix() { if(cpu.needsIndFix) { @@ -897,16 +898,16 @@ static bool cpuTryTakeover() return false; } -static void cpuDoDMA() +FIXNES_NOINLINE static void cpuDoDMA() { - if(cpu.dmc_dma) + if(cpu.dma & CPU_DMC_DMA) { if(cpu.currently_dma && cpu.dmc_halted) { if(cpu.dmc_dma_dummyread) //1st read always dummy read { cpu.dmc_dma_dummyread = false; - if(!cpu.oam_dma || cpu.oam_dma_pause) + if(!(cpu.dma&CPU_OAM_DMA) || cpu.oam_dma_pause) { cpuDMATryHalt(); return; @@ -916,7 +917,7 @@ static void cpuDoDMA() { uint8_t dmc_dma_val = memGet8(cpu.dmc_dma_addr); apuWriteDMCBuf(dmc_dma_val); - cpu.dmc_dma = false; + cpu.dma &= ~CPU_DMC_DMA; if(!cpu.oam_halted) //done with DMA cpu.currently_dma = false; //for next dmc dma @@ -926,7 +927,7 @@ static void cpuDoDMA() } else //WRITE on odd, possibly another dummy read { - if(!cpu.oam_dma || cpu.oam_dma_pause) + if(!(cpu.dma&CPU_OAM_DMA) || cpu.oam_dma_pause) { cpuDMATryHalt(); return; @@ -943,7 +944,7 @@ static void cpuDoDMA() { cpu.dmc_halted = true; //fully ignore OAM DMA if this is out 2nd/3rd attempt - if(cpu.oam_dma && cpu.dmc_halt_attempt) + if((cpu.dma&CPU_OAM_DMA) && cpu.dmc_halt_attempt) cpu.oam_dma_pause = true; else //was first halt attempt, possibly allows next OAM DMA cycle cpu.oam_dma_pause = false; @@ -953,7 +954,7 @@ static void cpuDoDMA() } } } - if(cpu.oam_dma) + if(cpu.dma&CPU_OAM_DMA) { if(cpu.currently_dma && cpu.oam_halted) { @@ -975,7 +976,7 @@ static void cpuDoDMA() if(cpu.oam_dma_ptr == 0) { //printf("OAM DMA Done\n"); - cpu.oam_dma = false; + cpu.dma &= ~CPU_OAM_DMA; if(!cpu.dmc_halted) //done with DMA cpu.currently_dma = false; cpu.oam_halted = false; @@ -991,14 +992,17 @@ static void cpuDoDMA() } /* Main CPU Interpreter */ -bool cpuCycle() +FIXNES_ALWAYSINLINE bool cpuCycle() { cpu_odd_cycle^=true; //printf("CPU Cycle\n"); //do DMC and OAM DMA first - cpuDoDMA(); - if(cpu.currently_dma) - return true; + if(cpu.dma) + { + cpuDoDMA(); + if(cpu.currently_dma) + return true; + } uint8_t instr, cpu_action; cpu_action = cpu.action_arr[cpu.arr_pos]; cpu.arr_pos++; @@ -1602,17 +1606,17 @@ void cpuSoftReset() void cpuDoOAM_DMA(uint16_t addr, uint8_t val) { (void)addr; - cpu.oam_dma = true; + cpu.dma |= CPU_OAM_DMA; cpu.oam_dma_addr = (val<<8); } void cpuDoDMC_DMA(uint16_t addr) { - cpu.dmc_dma = true; + cpu.dma |= CPU_DMC_DMA; cpu.dmc_dma_addr = addr; } bool cpuInDMC_DMA() { - return cpu.dmc_dma; + return !!(cpu.dma&CPU_DMC_DMA); } diff --git a/cpu.h b/cpu.h index 6f0cacf..4eb75dc 100644 --- a/cpu.h +++ b/cpu.h @@ -8,12 +8,14 @@ #ifndef _cpu_h_ #define _cpu_h_ +#include "common.h" + void cpuInit(); void cpuInitNSF(uint16_t addr, uint8_t newA, uint8_t newX); void cpuStartPlayNSF(); void cpuEndPlayNSF(); void cpuSoftReset(); -bool cpuCycle(); +FIXNES_ALWAYSINLINE bool cpuCycle(); void cpuDoOAM_DMA(uint16_t addr, uint8_t val); void cpuDoDMC_DMA(uint16_t addr); bool cpuInDMC_DMA(); diff --git a/libretro/Makefile b/libretro/Makefile index 3730369..4c2007a 100644 --- a/libretro/Makefile +++ b/libretro/Makefile @@ -1,8 +1,9 @@ TARGET_NAME := fixnes DEBUG = 0 -AUDIO_FLOAT = 0 +AUDIO_LOWERFREQ = 0 DO_LTO = 0 +DO_INLINE_ATTRIBS = 0 ifeq ($(platform),) platform = unix @@ -221,7 +222,9 @@ else ifeq ($(platform), ngc) PLATFORM_DEFINES += -DGEKKO -DHW_DOL -mrvl -mcpu=750 -meabi -mhard-float PLATFORM_DEFINES += -U__INT32_TYPE__ -U __UINT32_TYPE__ -D__INT32_TYPE__=int STATIC_LINKING = 1 + AUDIO_LOWERFREQ = 1 DO_LTO = 1 + DO_INLINE_ATTRIBS = 1 # Nintendo Wii else ifeq ($(platform), wii) @@ -233,6 +236,7 @@ else ifeq ($(platform), wii) PLATFORM_DEFINES += -U__INT32_TYPE__ -U __UINT32_TYPE__ -D__INT32_TYPE__=int STATIC_LINKING = 1 DO_LTO = 1 + DO_INLINE_ATTRIBS = 1 # Nintendo WiiU else ifeq ($(platform), wiiu) @@ -243,6 +247,7 @@ else ifeq ($(platform), wiiu) PLATFORM_DEFINES += -DGEKKO -DWIIU -DHW_RVL -mwup -mcpu=750 -meabi -mhard-float PLATFORM_DEFINES += -U__INT32_TYPE__ -U __UINT32_TYPE__ -D__INT32_TYPE__=int STATIC_LINKING = 1 + DO_INLINE_ATTRIBS = 1 # ARM else ifneq (,$(findstring armv,$(platform))) @@ -294,8 +299,8 @@ else endif -ifeq ($(AUDIO_FLOAT),1) -CFLAGS += -DAUDIO_FLOAT=1 +ifeq ($(AUDIO_LOWERFREQ),1) +CFLAGS += -DAUDIO_LOWERFREQ=1 endif ifeq ($(DO_LTO),1) @@ -303,6 +308,10 @@ CFLAGS += -flto CXXFLAGS += -flto endif +ifeq ($(DO_INLINE_ATTRIBS),1) +CFLAGS += -DDO_INLINE_ATTRIBS=1 -Wno-attributes +endif + ifeq ($(DEBUG), 1) CFLAGS += -O0 -g CXXFLAGS += -O0 -g diff --git a/main.c b/main.c index 1c7edca..760249b 100644 --- a/main.c +++ b/main.c @@ -38,7 +38,7 @@ #define DEBUG_KEY 0 #define DEBUG_LOAD_INFO 1 -const char *VERSION_STRING = "fixNES Alpha v1.2.1"; +const char *VERSION_STRING = "fixNES Alpha v1.2.2"; static char window_title[256]; static char window_title_pause[256]; diff --git a/ppu.c b/ppu.c index f67591b..b797ea7 100644 --- a/ppu.c +++ b/ppu.c @@ -468,22 +468,22 @@ static void saveSprite(uint8_t p0, uint8_t p1) ppu.OAM2Ptr += 4; } -static void updateBGTileAddress() +FIXNES_ALWAYSINLINE inline static void updateBGTileAddress() { if((ppu.VramAddr & 0x1F) == 0x1F) ppu.VramAddr ^= 0x41F; else ppu.VramAddr++; } -static void updateBGHoriAddress() +FIXNES_ALWAYSINLINE inline static void updateBGHoriAddress() { ppu.VramAddr = (ppu.VramAddr & (~PPU_VRAM_HORIZONTAL_MASK)) | (ppu.TmpVramAddr & PPU_VRAM_HORIZONTAL_MASK); } -static void updateBGVertAddress() +FIXNES_ALWAYSINLINE inline static void updateBGVertAddress() { ppu.VramAddr = (ppu.VramAddr & (~PPU_VRAM_VERTICAL_MASK)) | (ppu.TmpVramAddr & PPU_VRAM_VERTICAL_MASK); } -static void updateBGYAddress() +FIXNES_ALWAYSINLINE inline static void updateBGYAddress() { /* update Y position for writes */ if((ppu.VramAddr & 0x7000) != (7<<12)) @@ -496,13 +496,13 @@ static void updateBGYAddress() } } -static void setNTAddr() +FIXNES_ALWAYSINLINE inline static void setNTAddr() { ppu.NextAddr = (ppu.VramAddr & 0xFFF) | 0x2000; } extern void mmc5setTile(uint16_t dot); -static void getNTAddr(uint16_t dot) +FIXNES_ALWAYSINLINE inline static void getNTAddr(uint16_t dot) { /* MMC5 Scanline/Scroll Related */ if(ppuMapper5) mmc5setTile(dot); @@ -512,40 +512,40 @@ static void getNTAddr(uint16_t dot) ppu.NextTile = chrROMBG+(ntByte<<4)+curTileY; } -static void setATAddr() +FIXNES_ALWAYSINLINE inline static void setATAddr() { ppu.NextAddr = (ppu.VramAddr & 0xC00) | 0x23C0 | ((ppu.VramAddr>>4)&0x38) | ((ppu.VramAddr>>2)&7); } -static void getATAddr() +FIXNES_ALWAYSINLINE inline static void getATAddr() { /* Select new BG Background Attribute */ ppu.BGAttribReg = memPPUGet8(ppu.NextAddr) >> ((ppu.VramAddr & 0x2) | (ppu.VramAddr >> 4 & 0x4)); } -static void setTileAddr(uint8_t add) +FIXNES_ALWAYSINLINE inline static void setTileAddr(uint8_t add) { ppu.NextAddr = ppu.NextTile+add; } -static void getBGTileAddrLow() +FIXNES_ALWAYSINLINE inline static void getBGTileAddrLow() { mapperChrMode = 0; uint8_t tmp = memPPUGet8(ppu.NextAddr); ppu.BGRegB = tmp >> 0 & 0x55; ppu.BGRegA = tmp >> 1 & 0x55; } -static void getBGTileAddrHigh() +FIXNES_ALWAYSINLINE inline static void getBGTileAddrHigh() { mapperChrMode = 0; uint8_t tmp = memPPUGet8(ppu.NextAddr); ppu.BGRegA |= tmp << 0 & 0xAA; ppu.BGRegB |= tmp << 1 & 0xAA; } -static uint8_t getSpriteTileAddr() +FIXNES_ALWAYSINLINE inline static uint8_t getSpriteTileAddr() { mapperChrMode = 1; return memPPUGet8(ppu.NextAddr); } -static void spriteEvalInit() +FIXNES_ALWAYSINLINE inline static void spriteEvalInit() { ppu.OAMzSpritePos = ppu.OAMpos; ppu.OAM2pos = 0; @@ -553,7 +553,7 @@ static void spriteEvalInit() ppu.spriteEvalB = spriteEvalB_P1; } -static void spriteEvalA() +FIXNES_ALWAYSINLINE inline static void spriteEvalA() { ppu.TmpOAMVal = ppu.OAM[(ppu.OAMpos+ppu.OAMcpPos)&0xFF]; //printf("%i %i %i %02x\n", ppu.OAMpos, ppu.OAMcpPos, (ppu.OAMpos+ppu.OAMcpPos)&0xFF, ppu.TmpOAMVal); @@ -702,17 +702,17 @@ static void spriteEvalB_P9(uint16_t line) ppu.OAMpos += 4; } -static void initSpriteTileFetch() +FIXNES_ALWAYSINLINE inline static void initSpriteTileFetch() { ppu.OAM2Ptr = ppu.OAM2; } -static void resetOAMPos() +FIXNES_ALWAYSINLINE inline static void resetOAMPos() { ppu.OAMpos = 0; } -static void ClearOAM2Byte() +FIXNES_ALWAYSINLINE inline static void ClearOAM2Byte() { ppu.OAM2[ppu.OAM2pos++] = 0xFF; } @@ -744,7 +744,7 @@ static uint16_t ppuLastDot(uint16_t line) return line; } -void ppuCycle() +FIXNES_ALWAYSINLINE void ppuCycle() { uint16_t dot = ppu.curDot, line = ppu.curLine, drawPos; uint8_t curCol; @@ -1635,7 +1635,7 @@ static uint8_t ppuDoSprites(uint8_t color, uint16_t dot) return color; } -bool ppuDrawDone() +FIXNES_ALWAYSINLINE bool ppuDrawDone() { if(ppu.FrameDone) { diff --git a/ppu.h b/ppu.h index 928aae9..b66a631 100644 --- a/ppu.h +++ b/ppu.h @@ -8,10 +8,12 @@ #ifndef _ppu_h_ #define _ppu_h_ +#include "common.h" + void ppuInit(); void ppuAddCycles(); -void ppuCycle(); -bool ppuDrawDone(); +FIXNES_ALWAYSINLINE void ppuCycle(); +FIXNES_ALWAYSINLINE bool ppuDrawDone(); uint8_t ppuGet8(uint16_t addr); void ppuSet8(uint16_t addr, uint8_t val); uint8_t ppuNMI();