mess Posted August 29, 2006 Report Share Posted August 29, 2006 Can somebody help me optimising this function in assembler?I don't have enough experience with ASM to do this myself right nowC code: void DSEQ_PlayStep(unsigned char trk) { unsigned char mask_current_step, step_byte_pos, mask_current_trk; // set trk bit mask to track mask_current_trk = 0x80 >> trk; // calculate step bit (current pos modulo 8) mask_current_step = MIOS_HLP_GetBitORMask(0x07 - (pos[trk]&0x07)); // calculate step byte (current pos /8 + tracknr * 4) step_byte_pos = (pos[trk] >> 3) + (trk << 2); //handle note off from previous step if (trk_last_trig&mask_current_trk) { //set last trig to false trk_last_trig &= (~mask_current_trk); //send midi note (midi channel one is hardcoded) MIOS_MIDI_TxBufferPut(0x90); MIOS_MIDI_TxBufferPut(buffer[DSEQ_NOTENR_OFFSET+trk]); MIOS_MIDI_TxBufferPut(0x00); } //handle note on from current step if ((buffer[step_byte_pos]&mask_current_step) &&(mute_trk&mask_current_trk)) { //set last trig to true trk_last_trig |= mask_current_trk; // Note Event at channel #1 note nr in trk_note_nr[trk] MIOS_MIDI_TxBufferPut(0x90); MIOS_MIDI_TxBufferPut(buffer[DSEQ_NOTENR_OFFSET+trk]); if (buffer[step_byte_pos+DSEQ_ACC_OFFSET]&mask_current_step) { MIOS_MIDI_TxBufferPut(buffer[DSEQ_ACCVEL_OFFSET]); } else { MIOS_MIDI_TxBufferPut(buffer[DSEQ_NRMVEL_OFFSET]); } } } sdcc assembler output: ; ; Starting pCode block S_dseq__DSEQ_PlayStep code _DSEQ_PlayStep: ; .line 99; dseq.c void DSEQ_PlayStep(unsigned char trk) MOVFF FSR2L, POSTDEC1 MOVFF FSR1L, FSR2L MOVFF r0x00, POSTDEC1 MOVFF r0x01, POSTDEC1 MOVFF r0x02, POSTDEC1 MOVFF r0x03, POSTDEC1 MOVFF r0x04, POSTDEC1 MOVFF r0x05, POSTDEC1 MOVFF r0x06, POSTDEC1 MOVFF r0x07, POSTDEC1 MOVFF r0x08, POSTDEC1 MOVLW 0x02 MOVFF PLUSW2, r0x00 ; .line 103; dseq.c mask_current_trk = 0x80 >> trk; MOVLW 0x80 MOVWF r0x01 MOVF r0x00, W BZ _00135_DS_ NEGF WREG BCF STATUS, 0 _00136_DS_: RRCF r0x01, F ADDLW 0x01 BNC _00136_DS_ _00135_DS_: ; .line 105; dseq.c mask_current_step = MIOS_HLP_GetBitORMask(0x07 - (pos[trk]&0x07)); MOVLW LOW(_pos) ADDWF r0x00, W MOVWF r0x02 CLRF r0x03 MOVLW HIGH(_pos) ADDWFC r0x03, F MOVFF r0x02, FSR0L MOVFF r0x03, FSR0H MOVFF INDF0, r0x02 MOVLW 0x07 ANDWF r0x02, F MOVF r0x02, W SUBLW 0x07 MOVWF r0x02 MOVF r0x02, W CALL _MIOS_HLP_GetBitORMask MOVWF r0x02 ; .line 107; dseq.c step_byte_pos = (pos[trk] >> 3) + (trk << 2); MOVLW LOW(_pos) ADDWF r0x00, W MOVWF r0x04 CLRF r0x05 MOVLW HIGH(_pos) ADDWFC r0x05, F MOVFF r0x04, FSR0L MOVFF r0x05, FSR0H MOVFF INDF0, r0x04 SWAPF r0x04, W MOVWF r0x04 RLNCF r0x04, W ANDLW 0x1f MOVWF r0x04 RLCF r0x00, W ANDLW 0x7e MOVWF r0x06 ADDWF r0x06, F MOVF r0x06, W ADDWF r0x04, F ; .line 110; dseq.c if (trk_last_trig&mask_current_trk) { MOVF r0x01, W BANKSEL _trk_last_trig ANDWF _trk_last_trig, W, B MOVWF r0x06 MOVF r0x06, W BZ _00122_DS_ ; .line 112; dseq.c trk_last_trig &= (~mask_current_trk); COMF r0x01, W MOVWF r0x06 MOVF r0x06, W ; removed redundant BANKSEL ANDWF _trk_last_trig, F, B ; .line 114; dseq.c MIOS_MIDI_TxBufferPut(0x90); MOVLW 0x90 CALL _MIOS_MIDI_TxBufferPut ; .line 115; dseq.c MIOS_MIDI_TxBufferPut(buffer[DSEQ_NOTENR_OFFSET+trk]); MOVLW 0x60 ADDWF r0x00, W MOVWF r0x06 MOVLW LOW(_buffer) ADDWF r0x06, F MOVLW HIGH(_buffer) CLRF r0x07 ADDWFC r0x07, F MOVFF r0x06, FSR0L MOVFF r0x07, FSR0H MOVFF INDF0, r0x06 MOVF r0x06, W CALL _MIOS_MIDI_TxBufferPut ; .line 116; dseq.c MIOS_MIDI_TxBufferPut(0x00); MOVLW 0x00 CALL _MIOS_MIDI_TxBufferPut _00122_DS_: ; .line 119; dseq.c if ((buffer[step_byte_pos]&mask_current_step) MOVLW LOW(_buffer) ADDWF r0x04, W MOVWF r0x06 CLRF r0x08 MOVLW HIGH(_buffer) ADDWFC r0x08, F MOVFF r0x06, FSR0L MOVFF r0x08, FSR0H MOVFF INDF0, r0x06 MOVF r0x02, W ANDWF r0x06, F MOVF r0x06, W BZ _00129_DS_ ; .line 120; dseq.c &&(mute_trk&mask_current_trk)) { MOVF r0x01, W BANKSEL _mute_trk ANDWF _mute_trk, W, B MOVWF r0x06 MOVF r0x06, W BZ _00129_DS_ ; .line 123; dseq.c trk_last_trig |= mask_current_trk; MOVF r0x01, W BANKSEL _trk_last_trig IORWF _trk_last_trig, F, B ; .line 125; dseq.c MIOS_MIDI_TxBufferPut(0x90); MOVLW 0x90 CALL _MIOS_MIDI_TxBufferPut ; .line 126; dseq.c MIOS_MIDI_TxBufferPut(buffer[DSEQ_NOTENR_OFFSET+trk]); MOVLW 0x60 ADDWF r0x00, F MOVLW LOW(_buffer) ADDWF r0x00, F MOVLW HIGH(_buffer) CLRF r0x01 ADDWFC r0x01, F MOVFF r0x00, FSR0L MOVFF r0x01, FSR0H MOVFF INDF0, r0x00 MOVF r0x00, W CALL _MIOS_MIDI_TxBufferPut ; .line 127; dseq.c if (buffer[step_byte_pos+DSEQ_ACC_OFFSET]&mask_current_step) { MOVLW 0x20 ADDWF r0x04, F MOVLW LOW(_buffer) ADDWF r0x04, F MOVLW HIGH(_buffer) CLRF r0x00 ADDWFC r0x00, F MOVFF r0x04, FSR0L MOVFF r0x00, FSR0H MOVFF INDF0, r0x04 MOVF r0x04, W ANDWF r0x02, F MOVF r0x02, W BZ _00124_DS_ BANKSEL (_buffer + 122) ; .line 128; dseq.c MIOS_MIDI_TxBufferPut(buffer[DSEQ_ACCVEL_OFFSET]); MOVF (_buffer + 122), W, B CALL _MIOS_MIDI_TxBufferPut BRA _00129_DS_ _00124_DS_: BANKSEL (_buffer + 121) ; .line 130; dseq.c MIOS_MIDI_TxBufferPut(buffer[DSEQ_NRMVEL_OFFSET]); MOVF (_buffer + 121), W, B CALL _MIOS_MIDI_TxBufferPut _00129_DS_: MOVFF PREINC1, r0x08 MOVFF PREINC1, r0x07 MOVFF PREINC1, r0x06 MOVFF PREINC1, r0x05 MOVFF PREINC1, r0x04 MOVFF PREINC1, r0x03 MOVFF PREINC1, r0x02 MOVFF PREINC1, r0x01 MOVFF PREINC1, r0x00 MOVFF PREINC1, FSR2L RETURN Thank you,Michaël Quote Link to comment Share on other sites More sharing options...
TK. Posted August 29, 2006 Report Share Posted August 29, 2006 I wouldn't write this assembler, this won't change that much for such a short routine, maybe it currently takes ca. 10 uS, this is nothing - so why spending the effort? It can take hours or days to get this working together for somebody w/o mixed C/assembly skills - just see moxi's thread about the AOUT_LC integration into analog toolbox.It maybe makes sense to optimize the way how you are addressing the bits - but in C. E.g., it seems that your track triggers are counted from MSB first (mask_current_trk = 0x80 >> trk).This is very uncommon and produces unnecesary code, regardless if written in C or assembler. If you would organise it with LSB first, you can use the MIOS_HLP_GetBitANDMask function (which is assembly optimized) and you will get the result much faster:-> mask_current_trk = MIOS_HLP_GetBitANDMask(trk);The same for the step bit:mask_current_step = MIOS_HLP_GetBitORMask(pos[trk]&0x07);There isn't really much more to optimize... Quote Link to comment Share on other sites More sharing options...
mess Posted August 29, 2006 Author Report Share Posted August 29, 2006 Hi Thorsten,thank you for the fast responsethe reason why I organised the bits this way is to make the displaying of the steps easier (and hopefully faster):cs handler snippet: switch (cs_layer) { case CS_LAYER_NRM: //display steps, green leds = norm vel green+red leds = accent ledmatrix[0] = buffer[DSEQ_TRIG_OFFSET + trk_offset + 0]; ledmatrix[1] = buffer[DSEQ_TRIG_OFFSET + trk_offset + 1]; ledmatrix[2] = buffer[DSEQ_TRIG_OFFSET + trk_offset + 2]; ledmatrix[3] = buffer[DSEQ_TRIG_OFFSET + trk_offset + 3]; ledmatrix[4] = buffer[DSEQ_ACC_OFFSET + trk_offset + 0]; ledmatrix[5] = buffer[DSEQ_ACC_OFFSET + trk_offset + 1]; ledmatrix[6] = buffer[DSEQ_ACC_OFFSET + trk_offset + 2]; ledmatrix[7] = buffer[DSEQ_ACC_OFFSET + trk_offset + 3]; break; so it seems that I have two options:* reverse the matrix wiring so that the first step led is the LSB and use the MIOS bitmasks* use a ASM function with a reversed table so that DSEQ_GetStepORMask(0) selects the MSB instead of MIOS_HLP_GetBitORMask(pos[trk]&0x07);I'm going to try the second one first because I don't have to change any hardware for that... Quote Link to comment Share on other sites More sharing options...
TK. Posted August 29, 2006 Report Share Posted August 29, 2006 One thing that I've learned in the last years is: than earlier you are making changes on the hardware to improve the system, than less the effort at the end. You definitely want to do this sooner or later ;-)However, so long you are using C, I would propose to use an constant array of 8 bytes instead of a DSEQ_GetStepORMask(0) function - table accesses are faster, since there is no overhead for saving/restoring the stack.Best Regards, Thorsten. Quote Link to comment Share on other sites More sharing options...
mess Posted August 29, 2006 Author Report Share Posted August 29, 2006 One thing that I've learned in the last years is: than earlier you are making changes on the hardware to improve the system, than less the effort at the end. You definitely want to do this sooner or later ;-)I will have to resolder my connections with the matrix when the case is finished,so that seems like a good moment to do so :)I couldn't help trying to code the function in ASM (based on the original off coarse)it seems to work fine... unsigned char step; unsigned char DSEQ_HLP_GetStepORMask(unsigned char step_pos) __wparam { step = step_pos; __asm; btfsc _step, 2 bra DSEQ_HLP_GetStepORMask_4567 DSEQ_HLP_GetStepORMask_0123: btfsc _step, 1 bra DSEQ_HLP_GetStepORMask_23 DSEQ_HLP_GetStepORMask_01: btfss _step, 0 retlw b'10000000' retlw b'01000000' DSEQ_HLP_GetStepORMask_23: btfss _step, 0 retlw b'00100000' retlw b'00010000' DSEQ_HLP_GetStepORMask_4567: btfsc _step, 1 bra DSEQ_HLP_GetStepORMask_67 DSEQ_HLP_GetStepORMask_45: btfss _step, 0 retlw b'00001000' retlw b'00000100' DSEQ_HLP_GetStepORMask_67: btfss _step, 0 retlw b'00000010' retlw b'00000001' __endasm; } Quote Link to comment Share on other sites More sharing options...
Recommended Posts
Join the conversation
You can post now and register later. If you have an account, sign in now to post with your account.