Jump to content

need help optimising C code


mess
 Share

Recommended Posts

Can somebody help me optimising this function in assembler?

I don't have enough experience with ASM to do this myself right now

C code:

void DSEQ_PlayStep(unsigned char trk)
{
	unsigned char mask_current_step, step_byte_pos, mask_current_trk;
	// set trk bit mask to track
	mask_current_trk = 0x80 >> trk;
	// calculate step bit (current pos modulo 8)
	mask_current_step = MIOS_HLP_GetBitORMask(0x07 - (pos[trk]&0x07));
	// calculate step byte (current pos /8 + tracknr * 4)
	step_byte_pos = (pos[trk] >> 3) + (trk << 2);

	//handle  note off from previous step
	if (trk_last_trig&mask_current_trk) {
		//set last trig to false
		trk_last_trig &= (~mask_current_trk);
		//send midi note (midi channel one is hardcoded)
		MIOS_MIDI_TxBufferPut(0x90);
		MIOS_MIDI_TxBufferPut(buffer[DSEQ_NOTENR_OFFSET+trk]);
		MIOS_MIDI_TxBufferPut(0x00);
	}
	//handle note on from current step
	if ((buffer[step_byte_pos]&mask_current_step)
	  &&(mute_trk&mask_current_trk)) {
		//set last trig to true
		trk_last_trig |= mask_current_trk;
		// Note Event at channel #1 note nr in trk_note_nr[trk]
		MIOS_MIDI_TxBufferPut(0x90);
		MIOS_MIDI_TxBufferPut(buffer[DSEQ_NOTENR_OFFSET+trk]);
		if (buffer[step_byte_pos+DSEQ_ACC_OFFSET]&mask_current_step) {
			MIOS_MIDI_TxBufferPut(buffer[DSEQ_ACCVEL_OFFSET]);
		} else {
			MIOS_MIDI_TxBufferPut(buffer[DSEQ_NRMVEL_OFFSET]);
		}
	}
}
sdcc assembler output:
; ; Starting pCode block
S_dseq__DSEQ_PlayStep	code
_DSEQ_PlayStep:
;	.line	99; dseq.c	void DSEQ_PlayStep(unsigned char trk)
	MOVFF	FSR2L, POSTDEC1
	MOVFF	FSR1L, FSR2L
	MOVFF	r0x00, POSTDEC1
	MOVFF	r0x01, POSTDEC1
	MOVFF	r0x02, POSTDEC1
	MOVFF	r0x03, POSTDEC1
	MOVFF	r0x04, POSTDEC1
	MOVFF	r0x05, POSTDEC1
	MOVFF	r0x06, POSTDEC1
	MOVFF	r0x07, POSTDEC1
	MOVFF	r0x08, POSTDEC1
	MOVLW	0x02
	MOVFF	PLUSW2, r0x00
;	.line	103; dseq.c	mask_current_trk = 0x80 >> trk;
	MOVLW	0x80
	MOVWF	r0x01
	MOVF	r0x00, W
	BZ	_00135_DS_
	NEGF	WREG
	BCF	STATUS, 0
_00136_DS_:
	RRCF	r0x01, F
	ADDLW	0x01
	BNC	_00136_DS_
_00135_DS_:
;	.line	105; dseq.c	mask_current_step = MIOS_HLP_GetBitORMask(0x07 - (pos[trk]&0x07));
	MOVLW	LOW(_pos)
	ADDWF	r0x00, W
	MOVWF	r0x02
	CLRF	r0x03
	MOVLW	HIGH(_pos)
	ADDWFC	r0x03, F
	MOVFF	r0x02, FSR0L
	MOVFF	r0x03, FSR0H
	MOVFF	INDF0, r0x02
	MOVLW	0x07
	ANDWF	r0x02, F
	MOVF	r0x02, W
	SUBLW	0x07
	MOVWF	r0x02
	MOVF	r0x02, W
	CALL	_MIOS_HLP_GetBitORMask
	MOVWF	r0x02
;	.line	107; dseq.c	step_byte_pos = (pos[trk] >> 3) + (trk << 2);
	MOVLW	LOW(_pos)
	ADDWF	r0x00, W
	MOVWF	r0x04
	CLRF	r0x05
	MOVLW	HIGH(_pos)
	ADDWFC	r0x05, F
	MOVFF	r0x04, FSR0L
	MOVFF	r0x05, FSR0H
	MOVFF	INDF0, r0x04
	SWAPF	r0x04, W
	MOVWF	r0x04
	RLNCF	r0x04, W
	ANDLW	0x1f
	MOVWF	r0x04
	RLCF	r0x00, W
	ANDLW	0x7e
	MOVWF	r0x06
	ADDWF	r0x06, F
	MOVF	r0x06, W
	ADDWF	r0x04, F
;	.line	110; dseq.c	if (trk_last_trig&mask_current_trk) {
	MOVF	r0x01, W
	BANKSEL	_trk_last_trig
	ANDWF	_trk_last_trig, W, B
	MOVWF	r0x06
	MOVF	r0x06, W
	BZ	_00122_DS_
;	.line	112; dseq.c	trk_last_trig &= (~mask_current_trk);
	COMF	r0x01, W
	MOVWF	r0x06
	MOVF	r0x06, W
; removed redundant BANKSEL
	ANDWF	_trk_last_trig, F, B
;	.line	114; dseq.c	MIOS_MIDI_TxBufferPut(0x90);
	MOVLW	0x90
	CALL	_MIOS_MIDI_TxBufferPut
;	.line	115; dseq.c	MIOS_MIDI_TxBufferPut(buffer[DSEQ_NOTENR_OFFSET+trk]);
	MOVLW	0x60
	ADDWF	r0x00, W
	MOVWF	r0x06
	MOVLW	LOW(_buffer)
	ADDWF	r0x06, F
	MOVLW	HIGH(_buffer)
	CLRF	r0x07
	ADDWFC	r0x07, F
	MOVFF	r0x06, FSR0L
	MOVFF	r0x07, FSR0H
	MOVFF	INDF0, r0x06
	MOVF	r0x06, W
	CALL	_MIOS_MIDI_TxBufferPut
;	.line	116; dseq.c	MIOS_MIDI_TxBufferPut(0x00);
	MOVLW	0x00
	CALL	_MIOS_MIDI_TxBufferPut
_00122_DS_:
;	.line	119; dseq.c	if ((buffer[step_byte_pos]&mask_current_step)
	MOVLW	LOW(_buffer)
	ADDWF	r0x04, W
	MOVWF	r0x06
	CLRF	r0x08
	MOVLW	HIGH(_buffer)
	ADDWFC	r0x08, F
	MOVFF	r0x06, FSR0L
	MOVFF	r0x08, FSR0H
	MOVFF	INDF0, r0x06
	MOVF	r0x02, W
	ANDWF	r0x06, F
	MOVF	r0x06, W
	BZ	_00129_DS_
;	.line	120; dseq.c	&&(mute_trk&mask_current_trk)) {
	MOVF	r0x01, W
	BANKSEL	_mute_trk
	ANDWF	_mute_trk, W, B
	MOVWF	r0x06
	MOVF	r0x06, W
	BZ	_00129_DS_
;	.line	123; dseq.c	trk_last_trig |= mask_current_trk;
	MOVF	r0x01, W
	BANKSEL	_trk_last_trig
	IORWF	_trk_last_trig, F, B
;	.line	125; dseq.c	MIOS_MIDI_TxBufferPut(0x90);
	MOVLW	0x90
	CALL	_MIOS_MIDI_TxBufferPut
;	.line	126; dseq.c	MIOS_MIDI_TxBufferPut(buffer[DSEQ_NOTENR_OFFSET+trk]);
	MOVLW	0x60
	ADDWF	r0x00, F
	MOVLW	LOW(_buffer)
	ADDWF	r0x00, F
	MOVLW	HIGH(_buffer)
	CLRF	r0x01
	ADDWFC	r0x01, F
	MOVFF	r0x00, FSR0L
	MOVFF	r0x01, FSR0H
	MOVFF	INDF0, r0x00
	MOVF	r0x00, W
	CALL	_MIOS_MIDI_TxBufferPut
;	.line	127; dseq.c	if (buffer[step_byte_pos+DSEQ_ACC_OFFSET]&mask_current_step) {
	MOVLW	0x20
	ADDWF	r0x04, F
	MOVLW	LOW(_buffer)
	ADDWF	r0x04, F
	MOVLW	HIGH(_buffer)
	CLRF	r0x00
	ADDWFC	r0x00, F
	MOVFF	r0x04, FSR0L
	MOVFF	r0x00, FSR0H
	MOVFF	INDF0, r0x04
	MOVF	r0x04, W
	ANDWF	r0x02, F
	MOVF	r0x02, W
	BZ	_00124_DS_
	BANKSEL	(_buffer + 122)
;	.line	128; dseq.c	MIOS_MIDI_TxBufferPut(buffer[DSEQ_ACCVEL_OFFSET]);
	MOVF	(_buffer + 122), W, B
	CALL	_MIOS_MIDI_TxBufferPut
	BRA	_00129_DS_
_00124_DS_:
	BANKSEL	(_buffer + 121)
;	.line	130; dseq.c	MIOS_MIDI_TxBufferPut(buffer[DSEQ_NRMVEL_OFFSET]);
	MOVF	(_buffer + 121), W, B
	CALL	_MIOS_MIDI_TxBufferPut
_00129_DS_:
	MOVFF	PREINC1, r0x08
	MOVFF	PREINC1, r0x07
	MOVFF	PREINC1, r0x06
	MOVFF	PREINC1, r0x05
	MOVFF	PREINC1, r0x04
	MOVFF	PREINC1, r0x03
	MOVFF	PREINC1, r0x02
	MOVFF	PREINC1, r0x01
	MOVFF	PREINC1, r0x00
	MOVFF	PREINC1, FSR2L
	RETURN	

Thank you,

Michaël

Link to comment
Share on other sites

I wouldn't write this assembler, this won't change that much for such a short routine, maybe it currently takes ca. 10 uS, this is nothing - so why spending the effort? It can take hours or days to get this working together for somebody w/o mixed C/assembly skills - just see moxi's thread about the AOUT_LC integration into analog toolbox.

It maybe makes sense to optimize the way how you are addressing the bits - but in C.

E.g., it seems that your track triggers are counted from MSB first (mask_current_trk = 0x80 >> trk).

This is very uncommon and produces unnecesary code, regardless if written in C or assembler.

If you would organise it with LSB first, you can use the MIOS_HLP_GetBitANDMask function (which is assembly optimized) and you will get the result much faster:

-> mask_current_trk = MIOS_HLP_GetBitANDMask(trk);

The same for the step bit:

mask_current_step = MIOS_HLP_GetBitORMask(pos[trk]&0x07);

There isn't really much more to optimize...

Link to comment
Share on other sites

Hi Thorsten,

thank you for the fast response

the reason why I organised the bits this way

is to make the displaying of the steps easier (and hopefully faster):

cs handler snippet:

	
       switch (cs_layer) {
		case CS_LAYER_NRM:
			//display steps, green leds = norm vel green+red leds = accent
			ledmatrix[0] = buffer[DSEQ_TRIG_OFFSET + trk_offset + 0];
			ledmatrix[1] = buffer[DSEQ_TRIG_OFFSET + trk_offset + 1];
			ledmatrix[2] = buffer[DSEQ_TRIG_OFFSET + trk_offset + 2];
			ledmatrix[3] = buffer[DSEQ_TRIG_OFFSET + trk_offset + 3];
			ledmatrix[4] = buffer[DSEQ_ACC_OFFSET + trk_offset + 0];
			ledmatrix[5] = buffer[DSEQ_ACC_OFFSET + trk_offset + 1];
			ledmatrix[6] = buffer[DSEQ_ACC_OFFSET + trk_offset + 2];
			ledmatrix[7] = buffer[DSEQ_ACC_OFFSET + trk_offset + 3];
			break;

so it seems that I have two options:

* reverse the matrix wiring so that the first step led is the LSB and use the MIOS bitmasks

* use a ASM function with a reversed table so that DSEQ_GetStepORMask(0) selects the MSB

  instead of MIOS_HLP_GetBitORMask(pos[trk]&0x07);

I'm going to try the second one first because I don't have to change any hardware for that...

Link to comment
Share on other sites

One thing that I've learned in the last years is: than earlier you are making changes on the hardware to improve the system, than less the effort at the end. You definitely want to do this sooner or later ;-)

However, so long you are using C, I would propose to use an constant array of 8 bytes instead of a DSEQ_GetStepORMask(0) function - table accesses are faster, since there is no overhead for saving/restoring the stack.

Best Regards, Thorsten.

Link to comment
Share on other sites

One thing that I've learned in the last years is: than earlier you are making changes on the hardware to improve the system, than less the effort at the end. You definitely want to do this sooner or later ;-)

I will have to resolder my connections with the matrix when the case is finished,

so that seems like a good moment to do so :)

I couldn't help trying to code the function in ASM (based on the original off coarse)

it seems to work fine...

unsigned char step;
unsigned char DSEQ_HLP_GetStepORMask(unsigned char step_pos) __wparam
{
	step = step_pos;
__asm;
	btfsc	_step, 2
	bra 	DSEQ_HLP_GetStepORMask_4567

DSEQ_HLP_GetStepORMask_0123:
	btfsc	_step, 1
	bra 	DSEQ_HLP_GetStepORMask_23
DSEQ_HLP_GetStepORMask_01:
	btfss	_step, 0
	retlw   b'10000000'
	retlw	b'01000000'
DSEQ_HLP_GetStepORMask_23:
	btfss	_step, 0
	retlw   b'00100000'
	retlw	b'00010000'

DSEQ_HLP_GetStepORMask_4567:
	btfsc	_step, 1
	bra 	DSEQ_HLP_GetStepORMask_67
DSEQ_HLP_GetStepORMask_45:
	btfss	_step, 0
	retlw	b'00001000'
	retlw	b'00000100'
DSEQ_HLP_GetStepORMask_67:
	btfss	_step, 0
	retlw   b'00000010'
	retlw	b'00000001'
__endasm;

}

Link to comment
Share on other sites

Join the conversation

You can post now and register later. If you have an account, sign in now to post with your account.

Guest
Reply to this topic...

×   Pasted as rich text.   Paste as plain text instead

  Only 75 emoji are allowed.

×   Your link has been automatically embedded.   Display as a link instead

×   Your previous content has been restored.   Clear editor

×   You cannot paste images directly. Upload or insert images from URL.

Loading...
 Share

×
×
  • Create New...