%if 0

lDebug heatshrink/lzexedat depacker

Copyright (C) 2008-2025 E. C. Masloch

Usage of the works is permitted provided that this
instrument is retained with the works, so that any entity
that uses the works is notified of this instrument.

DISCLAIMER: THE WORKS ARE WITHOUT WARRANTY.

%endif

%include "lmacros3.mac"


	numdef LZEXEDAT,	0
	numdef DEPACKINLINELITERAL,	1
	numdef DXTAG,		1
	numdef LONGLITERAL,	0
	numdef MULTI,		0
	numdef STANDALONE,	1
	numdef DATEXE,		0

%ifn _STANDALONE
	overridedef DEBUG0,	0
	overridedef COUNTER,	0
%else
 %ifn _DATEXE
	overridedef MULTI,	0
 %endif
	numdef DEBUG0,		0
	numdef COUNTER,		0, 32
%if (_COUNTER - 1) & _COUNTER
 %error COUNTER must be a power of two
%endif
	numdef OUTBUFFER,	1


%ifn _DATEXE
	cpu 8086
	org 256
	addsection CODE, start=256
testprogram:
	mov si, 81h

	cmp sp, stack.top
	jae @F
error:
	mov dx, msg.error
	mov ah, 09h
	int 21h
	mov ax, 4CFFh
	int 21h
	int 20h

@@:
	lodsb
	cmp al, 9
	je @B
	cmp al, 32
	je @B
	cmp al, 13
	je error
	mov dx, si
	dec dx

@@:
	lodsb
	cmp al, 9
	je @F
	cmp al, 32
	je @F
	cmp al, 13
	jne @B
@@:
	mov byte [si - 1], 0

	mov ax, 3D00h | 1_010_0_000b	; RO, DENY WRITE, No inherit
	int 21h
	jc error
	mov word [handle], ax

	xchg bx, ax
	mov ax, 4202h
	xor cx, cx
	xor dx, dx
	int 21h
	jc error

	push dx
	push ax
	mov ax, 4200h
	xor cx, cx
	xor dx, dx
	int 21h
	jc error

	mov ax, 3C00h
	xor cx, cx
	mov dx, msg.outname
	int 21h
	jc error
	mov word [handle2], ax

	pop dx
	pop cx				; = file size
%else	; _DATEXE
	usesection CODE
datexe_depack:
	mov cx, word [insize + 2]
	mov dx, word [insize]
%endif
	mov word [filebuffer.next], filebuffer
	mov word [filebuffer.tail], filebuffer
%if _OUTBUFFER
	mov word [outbuffer.next], outbuffer
%endif

%if _DATEXE
	mov ax, BUFFER
	mov es, ax
%endif
	mov di, resultbuffer
	push di
	push cx
	mov ax, 0CCCCh
	mov cx, words(resultbuffer.end - resultbuffer)
	rep stosw
	pop cx
	pop di
	mov si, resultbuffer.end

%if _DATEXE
	mov ax, [format]
..@breakpoint_d:
	nop
%endif

	call depack
%if _DATEXE
	mov dx, DATA
	mov ds, dx

	sbb ax, ax
	jz @F
	mov dx, msg.decompresserror
	call disp_msg_counted
	mov ax, 4C0Bh
	int 21h
@@:
%if _OUTBUFFER
	mov di, [outbuffer.next]
	call dump_outbuffer
%endif

	mov bx, word [outhandle]
	mov ax, 4202h
	xor cx, cx
	xor dx, dx
	int 21h
	jc openoutfile.error
	mov word [outsize], ax
	mov word [outsize + 2], dx

	rol byte [verbose], 1
	jc display_table_d
display_short_d:
	mov dx, msg.short.1
	call disp_msg_counted
	mov ax, word [insize]
	mov dx, word [insize + 2]
	call disp_dxax_dec
	mov dx, msg.short.2
	call disp_msg_counted
	mov ax, word [outsize]
	mov dx, word [outsize + 2]
	call disp_dxax_dec
	mov dx, msg.short.3
	call disp_msg_counted
	call disp_percentage_swapped
	mov ax, 4C00h
	int 21h


display_table_d:
	mov si, table_d
.loop:
	lodsw
	test ax, ax
	jz .done
	xchg dx, ax
	call disp_msg_counted
	lodsw
	xchg bx, ax
	mov cx, [bx]
	mov dx, [bx + 2]
	lodsw
	xchg cx, ax
	call cx
	xchg cx, ax
	mov al, 32
	call disp_al
	lodsw
	xchg cx, ax
	call cx
	mov dx, msg.linebreak
	call disp_msg_counted
	jmp .loop

.done:
	mov dx, msg.done.6
	call disp_msg_counted
	call disp_percentage_swapped
	mov dx, msg.done
	call disp_msg_counted
	mov ax, 4C00h
	int 21h
%else	; _DATEXE
	jc error

%if _OUTBUFFER
	mov di, [outbuffer.next]
	call dump_outbuffer
%endif

	push ss
	pop ds
	push ss
	pop es

	mov ah, 3Eh
	mov bx, [handle]
	int 21h

	mov ah, 3Eh
	mov bx, [handle2]
	int 21h

	mov ax, 4C00h
	int 21h
	int 20h


%if _COUNTER
disp_al:
	push dx
	push ax
	xchg ax, dx
	mov ah, 02h
	int 21h
	pop ax
	pop dx
	retn
%endif
%endif	; _DATEXE


		; INP:	-
		; OUT:	Does not return if error (in this example)
		;	CY if error
		;	NC if success,
		;	 al = byte read from file
		; CHG:	si
		; REM:	In this example program we assume ds = es = ss.
get_file_byte:
%if _DATEXE
	push ds
	mov si, DATA
	mov ds, si
%endif
	mov si, [filebuffer.next]
	cmp si, [filebuffer.tail]
	jb .buffered

	push ax
	push bx
	push cx
	push dx
	mov dx, filebuffer
	mov [filebuffer.next], dx
	mov [filebuffer.tail], dx
	mov ah, 3Fh
	mov bx, [handle]
        mov cx, filebuffer.end - filebuffer

	int 21h
%if _DATEXE
	jc getbuf.errorread
	test ax, ax
	jz getbuf.shortread
%else
	jc error
	test ax, ax
	jz error
%endif
	add ax, dx
	mov word [filebuffer.tail], ax
        mov si, dx
	pop dx
	pop cx
	pop bx
	pop ax

.buffered:
	lodsb
	mov [filebuffer.next], si
	clc
%if _DATEXE
	pop ds
%endif
	retn


%if _OUTBUFFER
		; INP:	ds:si -> data to write
		;	cx = length of data (may be zero)
		; OUT:	Does not return if error
		; CHG:	-
		; REM:	In this example program we assume ds = es = ss.
put_file_data:
	push ax
	push bx
	push dx
	push di
	push si
	push cx
%if _DATEXE
	push es
	mov di, DATA
	mov es, di
	es
%endif
	mov di, [outbuffer.next]
	jcxz .end
@@:
	cmp di, outbuffer.end
	jne @F

	call dump_outbuffer

@@:
	movsb
	loop @BB
%if _DATEXE
	es
%endif
	mov [outbuffer.next], di
.end:
%if _DATEXE
	pop es
%endif
	pop cx
	pop si
	pop di
	pop dx
	pop bx
	pop ax
	retn

dump_outbuffer:
	push cx
%if _DATEXE
	push ds
	mov cx, DATA
	mov ds, cx
%endif
	mov cx, di
	mov dx, outbuffer
	sub cx, dx
	jz @F
	mov bx, word [handle2]
	mov ah, 40h
	int 21h
%if _DATEXE
	jc putbuf.errorwrite
	cmp ax, cx
	jne putbuf.shortwrite
%else
	jc error
	cmp ax, cx
	jne error
%endif
@@:
	mov di, dx
%if _DATEXE
	pop ds
%endif
	pop cx
	retn
%else
		; INP:	ds:si -> data to write
		;	cx = length of data (may be zero)
		; OUT:	Does not return if error
		; CHG:	-
		; REM:	In this example program we assume ds = es = ss.
put_file_data:
	push ax
	push bx
	push dx
	mov dx, si
	mov ah, 40h
%if _DATEXE
	push ds
	mov di, DATA
	mov ds, di
%endif
	mov bx, [handle2]
%if _DATEXE
	pop ds
%endif
	jcxz @F
	int 21h
%if _DATEXE
	jc putbuf.errorwrite
	cmp ax, cx
	jne putbuf.shortwrite
%else
	jc error
	cmp ax, cx
	jne error
%endif
@@:
	pop dx
	pop bx
	pop ax
	retn
%endif


%ifn _DATEXE
	addsection DATA, align=1 follows=CODE
msg:
.error:		ascic "Error!",13,10
.outname:	asciz "output.bin"


	addsection BSSDATA, align=16 nobits follows=DATA
	alignb 16
resultbuffer:
.size equ 4096
		resb .size
.end:

%else
	usesection DATA
%endif
	alignb 2
filebuffer:	resb 256
.end:
	alignb 2
.next:		resw 1
.tail:		resw 1
%if _DATEXE
handle equ inhandle
handle2 equ outhandle
%else
handle:		resw 1
handle2:	resw 1
%endif

%if _OUTBUFFER
outbuffer:	resb 256
.end:
.next:		resw 1
%endif

%ifn _DATEXE
stack:		resb 512
.top:
%endif


	usesection CODE
%endif	; _STANDALONE

%if _MULTI
	overridedef LZEXEDAT, 1
	overridedef LONGLITERAL, 1
%endif

		; INP:	cx:dx = length of source
		;	es:di -> destination
		;	si -> behind end of destination (si > di)
		;	(al = format, 2 = heatshrink, 3 = lzexedat -4)
		;	(ah = 0FFh for switch4k on, 00h for off)
		; OUT:	NC if success
		;	CY if error,
		;	 bx = ?errordata (if _DEBUG0)
		; CHG:	ax, (bx), cx, dx, es, ds, si, di
		; STT:	UP
depack:
	lframe near
	lenter
 %if _DEBUG0 || _COUNTER
	xor bx, bx
 %endif
 %if _DEBUG0
	lvar word,	errordata
	 push bx
 %endif
%if _MULTI
	mov bl, al
%endif
%if _COUNTER || _MULTI
	lvar word,	format_and_counter
	lequ ?format_and_counter + 1, counter
	lequ ?format_and_counter, format
	 push bx		; initialise counter (high byte) to zero
				; and format (low byte)
%endif
%if _DATEXE
	lvar word,	empty_and_switch4k
	lequ ?empty_and_switch4k + 1, switch4k
	 push ax
%endif

	lvar dword,	original_dst
	 push es
	 push di
	lvar dword,	dst
	 push es
	 push di
	lvar dword,	src_remaining
	 push cx
	 push dx

	mov cx, si
	sub cx, di		; same flags as cmp si, di
	jbe .error

%if _DXTAG
	lvar word,	dst_end
	 push si		; push into [bp + ?dst_end]
%else
	mov dx, si
%endif

	xor ax, ax
	rep stosb		; prepare for matches before first data byte
%if _LZEXEDAT || _MULTI
	inc ax			; = 1
%endif
%if _MULTI
	lvar word,	word1
	lvar word,	word2
	lvar word,	word3

	cmp byte [bp + ?format], 2
	jb .error
	sub byte [bp + ?format], 3
		; -1 = heatshrink, 0 = lzexedat -4, 1 = lzexedat -4 -l
	jb .hs_init
	cmp byte [bp + ?format], al
	ja .error
%endif
%if _LZEXEDAT
.lze_init:
%if _MULTI
	lequ ?word1,	tagbitscycle
%else
	lvar word,	tagbitscycle
%endif
	 push ax		; = 1

%ifn _DXTAG
 %if _MULTI
	lequ ?word2,	tagword
 %else
	lvar word,	tagword
 %endif
	 push ax
%endif
	call get_bit			; ?tagbitscycle init to 1, ror sets CY
					;  (bogus result in CF is ignored)

	jmp .lze_loop

.lze_literal:
%if _LONGLITERAL
	inc cx
.loop_literal:
%endif

	call load_byte
%if _LONGLITERAL
	push cx
%endif
%if _DEPACKINLINELITERAL
%if _LONGLITERAL
	mov cx, 1
%else
	inc cx				; cx = 1
%endif
	les di, [bp + ?dst]		; -> destination
%if _DXTAG
	cmp di, [bp + ?dst_end]		; at end of circular buffer ?
%else
	cmp di, dx
%endif
	ja .error			; invalid -->
	jb @F				; no -->
	mov di, word [bp + ?original_dst]	; yes, reset
@@:
	push es
	pop ds
	mov si, di			; ds:si -> where to store
	stosb				; store it
	mov word [bp + ?dst], di	; update ?dst
	call put_file_data		; ds:si ->, length cx
%else
	push ax				; on stack: the literal
	 push ss
	 pop ds
	mov si, sp			; ds:si -> data on stack
%if _LONGLITERAL
	xor ax, ax
%else
	xchg ax, cx			; ax = 0
%endif
	inc ax				; length = 1
	mov bx, ax			; bx NZ
	call copy_data.bx
	pop ax				; (discard)
d0	mov byte [bp + ?errordata], 75h
	jc .error
%endif

%if _LONGLITERAL
	pop cx
	loop .loop_literal
%endif

%assign LENGTHOFLITERAL $ - .lze_literal
%warning length of lze literal = LENGTHOFLITERAL

.lze_loop:
	xor cx, cx			; ! ch = 0, cx = 0
	call get_bit
	jc .lze_literal

.lze_notliteral:
%if _COUNTER
	inc byte [bp + ?counter]
	test byte [bp + ?counter], _COUNTER - 1
	jnz @F
	mov al, '.'
	call disp_al_counter
@@:
%endif

	call get_bit
	jc .lze_combined

.lze_shortmatch:
		; cx = 0
	call get_bit
	rcl cx, 1
	call get_bit
	rcl cx, 1			; cx = 0..3

	call load_byte			; load displacement

	mov ah, -1			; sign extend for negative number
	xchg bx, ax			; bx = displacement (-1 to -256)

.lze_match_cx_plus_2:
	inc cx
.lze_match_cx_plus_1:
	inc cx

		; INP:	bx = match displacement (-1 to -4096)
		;	cx = match count (2 to 256)
	xchg ax, bx			; ax = displacement, cx = count
	xchg ax, cx			; cx = displacement, ax = length of match
	neg cx				; cx = index
	jmp .lze_match
.lze_loop_j1: equ .lze_loop

.lze_combined:
	call load_word			; load combined word

	mov bx, ax
	stc				; CY
	rcr bh, 1			; rotate in a 1
	sar bh, 1
	sar bh, 1			; fill top three bits with 1s
%if _DATEXE
	rol byte [bp + ?switch4k], 1
	jnc .8k
.4k:
	sar bh, 1			; fill top *four* bits with 1s
	and ah, 15
	jmp @F
.8k:
	and ah, 7
@@:
%else
 %if 1	; _4
	sar bh, 1			; fill top *four* bits with 1s
	and ah, 15
 %else
	and ah, 7
 %endif
%endif
	mov cl, ah			; ch already = 0
	jnz .lze_match_cx_plus_2	; length = cx + 2, bx = displacement

.lze_escape:
	call load_byte			; ah = 0
	cmp al, 1
	jb .end_check
	xchg cx, ax			; cx = length
	ja .lze_match_cx_plus_1		; length = cx + 1, bx = displacement

%if _LONGLITERAL
 %if _MULTI
	cmp byte [bp + ?format], 1
	jne .lze_normalise		; not lzexedat -4 -l -->
 %endif
	xchg cx, bx			; cx = displacement, top 4 or 3 bits sets
 %if _DATEXE
	and ch, ~ 1110_0000b		; isolate 13-bit displacement
	rol byte [bp + ?switch4k], 1
	jnc @F
	and ch, ~ 1111_0000b		; isolate 12-bit displacement
@@:
 %else
  %if 1 ; _4
	and ch, ~ 1111_0000b		; isolate 12-bit displacement
  %else
	and ch, ~ 1110_0000b		; isolate 13-bit displacement
  %endif
 %endif
 	jcxz .lze_normalise		; displacement 0 means segment change -->
 	cmp cx, 26			; long literals command needs >= 26 bytes
 	jb .error			; (reserved for future expansion)
 	jmp .loop_literal		; process the literals -->
%endif
.lze_normalise:
	jmp .lze_loop_j1

%if !_MULTI
.end_check:
d0	mov byte [bp + ?errordata], 7Dh
	xor cx, cx
	cmp word [bp + ?src_remaining + 2], cx
	jne .error
	cmp word [bp + ?src_remaining], cx
	jne .error

.end:
	db __TEST_IMM8			; (NC)
.error:
	stc

%if _COUNTER
	lahf
	mov al, 13
	call disp_al_for_progress
	mov al, 10
	call disp_al_for_progress
	sahf
%endif
d0	mov bx, word [bp + ?errordata]
	lleave code
	lret
%endif
%endif

%if _MULTI || !_LZEXEDAT
.hs_init:
@@:
	call read_byte
d0	mov byte [bp + ?errordata], 70h
	jc .error

	mov ah, 0
	test ax, ax
d0	mov byte [bp + ?errordata], 71h
	jz .error
	cmp al, 15
	ja .error

%if _MULTI
	lequ ?word1,	window_size_bits
%else
	lvar word,	window_size_bits
%endif
	 push ax

	xchg cx, ax
	mov ax, 1
	shl ax, cl
	cmp ax, resultbuffer.size
	ja .error

	call read_byte
d0	mov byte [bp + ?errordata], 72h
	jc .error

	mov ah, 0
	test ax, ax
d0	mov byte [bp + ?errordata], 73h
	jz .error
	cmp ax, word [bp + ?window_size_bits]
	jae .error

%if _MULTI
	lequ ?word2,	lookahead_size_bits
%else
	lvar word,	lookahead_size_bits
%endif
	 push ax

%if _DXTAG
	xor dx, dx			; dl = bit index, dh = current byte
%else
	xor cx, cx
 %if _MULTI
	lequ ?word3,	low_bit_index_and_high_current_byte
 %else
	lvar word,	low_bit_index_and_high_current_byte
 %endif
	 push cx
%endif

.loop:
%if _COUNTER
	inc byte [bp + ?counter]
	test byte [bp + ?counter], _COUNTER - 1
	jnz @F
	mov al, '.'
	call disp_al
@@:
%endif

	mov cx, 1
	call get_bits
	jnc .notend			; (cx = 0 if jumping)

d0	mov byte [bp + ?errordata], 7Dh
.end_check:
	xor ax, ax
	cmp word [bp + ?src_remaining], ax
	jne .error
	cmp word [bp + ?src_remaining + 2], ax
	jne .error

	db __TEST_IMM8			; (NC)
.error:
	stc

%if _COUNTER
	lahf
	mov al, 13
	call disp_al
	mov al, 10
	call disp_al
	sahf
%endif
d0	mov bx, word [bp + ?errordata]
	lleave code
	lret

.notend:				; (cx = 0)
	test al, al
	jz .notliteral

.hs_literal:
	mov cl, 8
	call get_bits			; cx = 0
d0	mov byte [bp + ?errordata], 74h
	jc .end_check
%if _DEPACKINLINELITERAL
	inc cx				; cx = 1
	les di, [bp + ?dst]		; -> destination
%if _DXTAG
	cmp di, [bp + ?dst_end]		; at end of circular buffer ?
%else
	cmp di, dx
%endif
	ja .error			; invalid -->
	jb @F				; no -->
	mov di, word [bp + ?original_dst]	; yes, reset
@@:
	push es
	pop ds
	mov si, di			; ds:si -> where to store
	stosb				; store it
	mov word [bp + ?dst], di	; update ?dst
	call put_file_data		; ds:si ->, length cx
%else
	push ax				; on stack: the literal
	 push ss
	 pop ds
	mov si, sp			; ds:si -> data on stack
	xchg ax, cx			; ax = 0
	inc ax				; length = 1
	mov bx, ax			; bx NZ
	call copy_data.bx
	pop ax				; (discard)
d0	mov byte [bp + ?errordata], 75h
	jc .error
%endif
%assign LENGTHOFLITERAL $ - .hs_literal
%warning length of hs literal = LENGTHOFLITERAL
	jmp .loop

.notliteral:				; (cx = 0)

	mov cl, byte [bp + ?window_size_bits]
					; cx = -w parameter
	call get_bits
d0	mov byte [bp + ?errordata], 76h
	jc .end_check
		; ax = output index
		; cx = 0
	xchg bx, ax
	mov cl, byte [bp + ?lookahead_size_bits]
					; cx = -l parameter
	call get_bits
d0	mov byte [bp + ?errordata], 77h
	jc .end_check
		; bx = output index less 1
		; ax = output count less 1
	inc bx				; = output index
d0	mov byte [bp + ?errordata], 78h
	jz .error

					; ax = length of match less one
	inc ax				; = length of match
	jz .error

	mov cx, bx			; 1 .. 65535
%endif

%if _LZEXEDAT
.lze_match:
%endif

		; cx = index, ax = length
	lds si, [bp + ?dst]
	mov bx, word [bp + ?original_dst]
	neg bx
	add bx, si			; dst - original = length at start
	sub bx, cx			; length start - match distance
	jae @F				; length start >= match distance -->
		; bx = negative length of matchable at end of buffer

%if _DXTAG
	mov si, [bp + ?dst_end]
%else
	mov si, dx
%endif
	add si, bx			; si -> match data
	mov bx, si
	add bx, ax			; -> behind match data (if fits)
	jc .full			; if doesn't fit, copy full -->
%if _DXTAG
	cmp bx, [bp + ?dst_end]		; -> behind is above-or-equal end ?
%else
	cmp bx, dx
%endif
	; jae .full			; yes -->
	jb .copy_data			; ax has full length from tail of buffer -->

		; if we branch to here,
		;  si + ax >= (word [bp + ?dst_end] or dx)
		;  therefore  ax >= (word [bp + ?dst_end] or dx) - si
.full:
	push ax				; save original length
		; calculate word [bp + ?dst_end] - si
%if _DXTAG
	mov ax, [bp + ?dst_end]
%else
	mov ax, dx
%endif
	sub ax, si			; -> end minus -> match data
	push ax				; save length
	call copy_data
d0	mov byte [bp + ?errordata], 1
	jc .error
	pop bx				; = done length
	pop ax				; = entire length
	sub ax, bx			; entire length minus done length
	mov si, word [bp + ?original_dst]	; -> start of buffer
	db __TEST_IMM16			; skip sub
@@:
	sub si, cx			; -> into buffer

.copy_data:
	call copy_data			; give ?dst -> dest, ds:si -> source
		; returns: ?dst incremented, ds:si -> after match source
d0	mov byte [bp + ?errordata], 7Bh
	jc .error

%if _MULTI
		; end of common code, dispatch back to main loop
	cmp byte [bp + ?format], 1
	je @F				; lzexedat -4 -l -->
	rol byte [bp + ?format], 1
	jc .loop
@@:
	jmp .lze_loop
%elif _LZEXEDAT
	jmp .lze_loop
%else
	jmp .loop
%endif


		; INP:	?dst -> destination,
		;	 destination may overlap the circular buffer boundary.
		;	ds:si -> source
		;	ax = how long the data is (0 is valid),
		;	 always 1 for literals.
		;	 source must fit in circular buffer, except if
		;	 source and destination overlap. in this case the
		;	 source starts in the buffer but may extend past
		;	 the circular buffer boundary.
		;	word [?dst_end] or dx -> end of circular buffer
		;	word [?original_dst] -> start of circular buffer
		; OUT:	?dst incremented
		;	CY if error (buffer too small)
		;	NC if success
		; CHG:	cx, bx, ax, es, di, ds, si
copy_data:
%ifn _DEPACKINLINELITERAL
	xor bx, bx
.bx:
		; bx = nonzero if literal
%endif

%if 0
	push ax
	push bx
	push cx
	push dx
	mov ax, 4201h
	mov cx, 0
	mov dx, 0
	mov bx, word [handle2]
	int 21h
	nop
	pop dx
	pop cx
	pop bx
	pop ax
%endif

d0	inc byte [bp + ?errordata + 1]

	les di, [bp + ?dst]

	xchg cx, ax			; cx = remaining length
	jcxz .end

%if _DXTAG
	cmp di, [bp + ?dst_end]
%else
	cmp di, dx
%endif
	ja .ret_CY			; error
		; literals may branch to .start_maybe_part
	je .start_maybe_part		; ?dst -> at end (put whole data at start)
	add di, cx			; -> behind data chunk (if fits)
	jc .fill			; doesn't fit -->
%if _DXTAG
	cmp di, [bp + ?dst_end]		; fits ?
%else
	cmp di, dx
%endif
		; literals never branch to .fill
	ja .fill			; doesn't fit -->
	mov di, word [bp + ?dst]	; restore -> destination
		; ds:si l cx fits in circular buffer
	jmp .mov			; store full up to below-or-equal -> end

.fill:
	mov di, word [bp + ?dst]	; restore -> destination
%if _DXTAG
	mov ax, [bp + ?dst_end]		; -> behind buffer
%else
	mov ax, dx
%endif
	sub ax, di			; -> behind buffer minus -> destination
					; = length fitting at end
	sub cx, ax			; cx -= length fitting at end
					; = remaining length to store at start
	xchg cx, ax			; cx = length fitting at end
	call .mov_and_put		; store at end
	xchg cx, ax			; cx = remaining length to store at start

.start_maybe_part:
	mov di, word [bp + ?original_dst]
%ifn _DEPACKINLINELITERAL
	test bx, bx			; literal ?
	jnz .mov			; yes -->
%endif
		; literals never go here
		; the following handles source overlapping destination
		;  s.t. source crosses the circular buffer boundary.
	mov ax, si
	add ax, cx			; -> behind source
	jc .part			; CY, must copy part -->
%if _DXTAG
	cmp ax, [bp + ?dst_end]		; below-or-equal end of buffer ?
%else
	cmp ax, dx
%endif
	jbe .mov			; yes -->
.part:
%if _DXTAG
	mov ax, [bp + ?dst_end]
%else
	mov ax, dx
%endif
	sub ax, si			; ax = how much source data remains
	sub cx, ax			; cx = how much to copy after
	xchg cx, ax			; cx = part length (source data remains)
	call .mov_and_put		; put from end
	xchg cx, ax			; cx = how much to copy from buffer start
	mov si, word [bp + ?original_dst]	; -> start of buffer
					; continue with di -> near start of buffer
.mov:
	call .mov_and_put

.end:
	mov word [bp + ?dst], di
	clc
.ret:
	retn

.ret_CY:
	stc
	retn

.mov_and_put:
	 push ds
	 push si
	push es
	push di
	push cx
	rep movsb
	pop cx
	pop si
	pop ds
	call put_file_data
	 pop si
	 pop ds
	add si, cx
	retn


%if _LZEXEDAT
		; INP:	?src_remaining
		;	dx / word [?tagword] = tag word
		;	word [?tagbitscycle] = circular tag bits indicator
		;	 (there's always a single bit set in this word.
		;	 if this word is equal to 1 on INP then the tag
		;	 word must be reloaded, and the last bit shifted
		;	 out of the INP prior tag word.)
		; OUT:	NC if read a zero,
		;	CY if read a one,
		;	 ?src_remaining decremented
		;	 dx / word [?tagword] updated
		;	 word [?tagbitscycle] updated
		;	branches to depack.error on error
		; CHG:	ds, si, ax
get_bit:
	ror word [bp + ?tagbitscycle], 1
	jnc .left
		; after this branch not taken, the cycle word
		;  wraps around to be equal to 8000h. this
		;  will wrap around again after 15 more ror.
	call load_word
%if _DXTAG
	shr dx, 1
	xchg dx, ax
	retn

.left:
	shr dx, 1
	retn
%else
	shr word [bp + ?tagword], 1
	mov word [bp + ?tagword], ax
	retn

.left:
	shr word [bp + ?tagword], 1
	retn
%endif

		; lodsw with ?src_remaining check
load_word:
	call load_byte
	xchg al, ah
	call load_byte
	xchg al, ah
	retn

		; lodsb with ?src_remaining check
load_byte:
	call read_byte
d0	mov byte [bp + ?errordata], 84h
	jc depack.error
	retn
%endif

%if _MULTI || !_LZEXEDAT
		; INP:	cx = 0..15
		;	dl = bit index, dh = current byte
		; OUT:	NC if successful,
		;	 ax = value read
		;	 cx = 0
		;	 dx updated
		;	CY if error
		; CHG:	ds, si
get_bits:
%ifn _DXTAG
	push dx
	mov dx, word [bp + ?low_bit_index_and_high_current_byte]
%endif
	cmp cx, 15
	ja .error
	test cx, cx
	jz .error
	xor ax, ax
.loop:
	test dl, dl
	jnz .havebit
	push ax
	call read_byte
	mov dh, al
	mov dl, 80h
	pop ax
	jc .error
.havebit:
	shl ax, 1
	test dh, dl
	jz @F
	inc ax
@@:
	shr dl, 1
	loop .loop
.end:
	db __TEST_IMM8		; skip stc, NC
.error:
	stc
%ifn _DXTAG
	mov word [bp + ?low_bit_index_and_high_current_byte], dx
	pop dx
%endif
	retn
%endif


		; INP:	?src_remaining
		; OUT:	NC if success,
		;	 al = value read
		;	 ?src_remaining decremented
		;	CY if error (source buffer too small),
		;	 ?src_remaining = 0
		; CHG:	ds, si
read_byte:
	sub word [bp + ?src_remaining], 1
	sbb word [bp + ?src_remaining + 2], 0
	jb .empty

	call get_file_byte
	retn

.empty:
	and word [bp + ?src_remaining], 0
	and word [bp + ?src_remaining + 2], 0
	stc
	retn

	lleave ctx

%if _MULTI
	resetdef LONGLITERAL
	resetdef LZEXEDAT
%endif

%ifn _STANDALONE
	resetdef COUNTER
	resetdef DEBUG0


read_and_depack:
	houdini

		; common setup for depacking
reloc	mov word [handle], bx
internaldatarelocation

reloc	mov word [depacked_buffer], dx
internaldatarelocation
reloc	mov word [depacked_buffer + 2], es
internaldatarelocation
reloc	mov word [depacked_length], cx
internaldatarelocation

%ifndef ELD_OMIT_PARTIALDEPACK
reloc	rol byte [partialdepack], 1
internaldatarelocation
	jnc .new
%endif
		; did we save a prior partial depack ?
reloc	cmp word [depack_saved_sp], strict byte 0FFFFh
internaldatarelocation -3
	je .new				; no -->
reloc	mov ax, word [depackseek + 2]
internaldatarelocation
reloc	cmp ax, word [depackskip + 2]
internaldatarelocation
	jne @F
reloc	mov ax, word [depackseek]
internaldatarelocation
reloc	cmp ax, word [depackskip]
internaldatarelocation
@@:
	ja .new				; seek is > skip, must rewind -->

reloc	mov ax, word [depackseek]
internaldatarelocation
reloc	sub word [depackskip], ax
internaldatarelocation
reloc	mov ax, word [depackseek + 2]
internaldatarelocation
reloc	sbb word [depackskip + 2], ax	; skip -= seek
internaldatarelocation

	push dx
	push bx
	push di
	push si
	push es
	push bp
	push cx				; stack frame must match what .read expects

reloc	mov word [depack_prior_sp], sp
internaldatarelocation			; store our sp
	jmp put_file_data.entry		; continue depacking


.new:
	push dx
	push cx
	push es
reloc	mov dx, word [libseek]
internaldatarelocation
reloc	mov cx, word [libseek + 2]
internaldatarelocation
	mov ax, 4200h
	call dos_or_boot_io
	pop es
	jc .io_error

reloc2	mov word [filebuffer.next], filebuffer
internaldatarelocation -4
internaldatarelocation
reloc2	mov word [filebuffer.tail], filebuffer
internaldatarelocation -4
internaldatarelocation

	pop cx
	pop dx

	xor ax, ax
reloc	mov word [depackseek], ax
internaldatarelocation
reloc	mov word [depackseek + 2], ax
internaldatarelocation

	push dx
	push bx
	push di
	push si
	push es
	push bp
	push cx
	 push ss
	 pop es
reloc	mov di, resultbuffer
internaldatarelocation
reloc	mov si, resultbuffer.end
internaldatarelocation

reloc	mov cx, word [libtab_compressed_length + 2]
internaldatarelocation
reloc	mov dx, word [libtab_compressed_length]
internaldatarelocation

reloc	mov word [depack_prior_sp], sp
internaldatarelocation
reloc	mov sp, depack_stack.top
internaldatarelocation
%if _MULTI
reloc	mov al, byte [depack_format]
internaldatarelocation
%endif
	call depack
reloc	mov word [depack_saved_sp], 0FFFFh
internaldatarelocation -4
.read:
	 push ss
	 pop ds

reloc	push word [depackseek]
internaldatarelocation
reloc	push word [depackseek + 2]
internaldatarelocation
reloc	pop word [depackskip + 2]
internaldatarelocation
reloc	pop word [depackskip]
internaldatarelocation

reloc	mov sp, word [depack_prior_sp]
internaldatarelocation
	jc @F
reloc	mov ax, word [depacked_length]
internaldatarelocation
	neg ax			; - remain
	pop cx
	add ax, cx		; requested - remain = how much read
	db __TEST_IMM8		; skip pop, NC
@@:
	pop cx
	pop bp
	pop es
	pop si
	pop di
	pop bx
	pop dx
	retn

.io_error:
	add sp, 4
	stc
	retn


get_file_byte:
reloc	mov si, [ss:filebuffer.next]
internaldatarelocation
reloc	cmp si, [ss:filebuffer.tail]
internaldatarelocation
	jb .buffered

	push ax
	push bx
	push cx
	push dx
	push es
	push ds

	 push ss
	 pop ds
	 push ss
	 pop es
reloc	mov dx, filebuffer
internaldatarelocation
reloc	mov [filebuffer.next], dx
internaldatarelocation
reloc	mov [filebuffer.tail], dx
internaldatarelocation
	mov ax, 3F00h
reloc	mov bx, [handle]
internaldatarelocation
	mov cx, filebuffer.end - filebuffer
	call dos_or_boot_io
	jc @F			; CY -->
	test ax, ax
	stc			; CY
	jz @F
	add ax, dx		; NC
reloc	mov word [filebuffer.tail], ax
internaldatarelocation
	mov si, dx
@@:
	pop ds
	pop es
	pop dx
	pop cx
	pop bx
	pop ax
	jc .ret
.buffered:
	ss lodsb
reloc	mov [ss:filebuffer.next], si
internaldatarelocation
	clc
.ret:
	retn


		; INP:	ds:si -> data to write
		;	cx = length of data (may be zero)
		; OUT:	Does not return if error
		; CHG:	-
put_file_data:
	push ax
	push bx
	push cx
	push dx
	push ds
	push si
	push es
	push di
	 push ss
	 pop ds
.entry2:
%if 0
reloc	cmp word [depackseek], strict word 0AFh
internaldatarelocation -4
	jne @F
reloc	cmp word [depackseek + 2], strict byte 0
internaldatarelocation -3
	jne @F
	houdini
@@:
%endif
	push cx			; put data length, add to depackseek maybe
reloc	cmp word [depackskip + 2], strict byte 0
internaldatarelocation -3
	jne .skip
reloc	cmp word [depackskip], cx
internaldatarelocation
	jae .skip		; still have to skip -->
	xor dx, dx
reloc	xchg dx, word [depackskip]
internaldatarelocation		; how much to skip in this step
				; dx = how many bytes skipped
	add si, dx		; -> data to copy (part of put data)
	sub cx, dx		; data - skip = length of data to copy, at least 1
	jz .ret_cx		; (not needed ?)
	push cx			; = how many bytes left at si
reloc	les di, [depacked_buffer]
internaldatarelocation
reloc	mov ax, word [depacked_length]
internaldatarelocation		; how much we want to read
	cmp ax, cx
	jae @F
	mov cx, ax		; cx = min(ax, cx) lesser of requested, available
@@:
	mov ax, cx		; how much we want to copy
	rep movsb		; copy to user buffer, si -> data left if any

reloc	mov word [depacked_buffer], di
internaldatarelocation		; -> past copied data
reloc	sub word [depacked_length], ax
internaldatarelocation		; decrement count of how much still wanted
		; this is NZ only if available < requested
	pop cx			; = how many bytes left at prior si -> data just copied
	jnz .ret_cx		; go add full length of put data to depackseek
	add dx, ax		; how many bytes processed (skipped + copied)
reloc	add word [depackseek], dx
internaldatarelocation
reloc	adc word [depackseek + 2], strict byte 0
internaldatarelocation -3	; add to seek the last bit processed
	pop dx			; discard cx on stack
	sub cx, ax		; = how many bytes left after new si ->
reloc	mov word [depack_left_length], cx
internaldatarelocation		; = amount data left after new si ->
reloc	mov word [depack_left_address], si
internaldatarelocation		; at current depackseek -> data left if any
	push bp
reloc	mov word [depack_saved_sp], sp
internaldatarelocation
	clc
	jmp read_and_depack.read

.entry:
reloc	mov sp, word [depack_saved_sp]
internaldatarelocation
	pop bp
reloc	mov cx, word [depack_left_length]
internaldatarelocation
reloc	mov si, word [depack_left_address]
internaldatarelocation
	jmp .entry2

.skip:
reloc	sub word [depackskip], cx
internaldatarelocation
reloc	sbb word [depackskip + 2], strict byte 0
internaldatarelocation -3

.ret_cx:
	pop cx
reloc	add word [depackseek], cx
internaldatarelocation
reloc	adc word [depackseek + 2], strict byte 0
internaldatarelocation -3

.ret:
	pop di
	pop es
	pop si
	pop ds
	pop dx
	pop cx
	pop bx
	pop ax
	retn
%endif
