; sp3.xm: third part of SPelling error detector/corrector Unicum
; /AJK 13.Sep.82, 24.Oct.82

;    _______
;   |      /
;   |     /
;   |    /    Copyright (c) 1982 by Knowlogy
;   |   //\                         PO Box 283
;   |  //  \                        Wilsonville, Oregon  97070
;   | //    \
;   |//______\


; The sp Unicum is made up of three files:
;   sp.cif -- global definitions
;   sp1.xm -- main program
;   sp2.xm -- correction dialog
;   sp3.xm -- support routines and data
; To re-create the executable program image SP.COM, do the following:
;   xm80 =sp1
;   xm80 =sp2
;   xm80 =sp3
;   l80 sp1,sp2,sp3,lib/s,sp/n/e

	provides SP

	uses LIB2800		; Z80 components
	uses LIB2801		; CP/M components


; AuxWord procedure: get a word from the auxiliary or jargon dictionary.
; The word is read from the file open on CHANNEL.
; The word is stored as a null-terminated string at WORD.
; Any replacement word is stored as a null-terminated string at REPL.
; FLAGS is set to one of:
;   (1 SHL STgood)			; if the word is spelled correctly
;   (1 SHL STbad)			; if the word is spelled incorrectly
;   (1 SHL STbad)+(1 SHL STrepl) 	; if there's a replacement
;   (1 SHL STbad)+(1 SHL STrepl)+AUTOr  ; if replacement is auto-replace
; Carry is set if EOF occurs (no word), else clear.
	proc AuxWord [CHANNEL:a,WORD:hl,REPL:de]->[FLAGS:a]+C
	begin
	push	bc
	push	de
	push	hl

; Get next line from the channel.
	ld	c,a
	ld	b,0
	LIOgl [stk=bc,stk=hl,stk=WrdSiz]->[]+C
	jr	c,x5			; end-of-file

; Keep track of FLAGS in C
	ld	c,1 SHL STgood		; assume word is good

; Search word for exclamation point.
x1:
	ld	a,(hl)
	and	a			; check for terminator
	jr	z,x4			; if so, no '!'
	cp	'!'			; see if it was '!'
	jr	z,x2			; branch if so
	inc	hl			; step past character
	jr	x1			; examine next character

; Here with an exclamation point.
x2:
	ld	c,1 SHL STbad		; mark word bad
	ld	(hl),0			; null-terminate the word
	inc	hl			; step to character after '!'

; See if it's a double exclamation point ("!!"), meaning auto-replace
	ld	a,(hl)
	cp	'!'
	jr	nz,x3
	inc	hl			; it is
	ld	a,(rflg)		; see if we're allowed to auto-replace
	and	a
	jr	nz,x3			; branch if we're not
	ld	a,(hl)			; see if there's anything to replace
	and	a
	jr	z,x4			; if not, don't set AUTOr
	ld	c,AUTOr+(1 SHL STbad)	; set AUTOr
x3:					; here with HL -> repl
	ld	a,(hl)			; see if there is a replacement
	and	a
	jr	z,x4			; branch if not

; Here with HL -> replacement string
; Copy it into REPL
	ex	de,hl
	ld	a,c			; preserve flags
	STRcpy [de,hl,bc=WrdSiz]
	or	1 SHL STrepl		; mark "replacement string present"
	ld	c,a

x4:
	and	a			; clear carry
	ld	a,c			; set FLAGS
x5:
	pop	hl
	pop	de
	pop	bc

	end AuxWord


; Insert procedure: insert WORD into the pointer list at WHERE.
	proc Insert [WORD:hl,WHERE:de]
	begin
	push	af
	push	bc
	push	de
	push	hl

; Make a copy of WORD in string space.
	ld	b,h			; set BC = WORD
	ld	c,l
	push	de			; save WHERE
	STRlng [hl]->[de=hl]		; DE = length of word
	ld	hl,(strbot)		; HL -> bottom of string space
	dec	hl			; HL -> terminator for new word
	push	hl			; save terminator
	and	a
	sbc	hl,de			; HL -> location of word
	ld	(strbot),hl		; that's new string space bottom
	STRcpy [de=bc,hl,bc=WrdSiz]	; copy word
	pop	hl			; HL -> word delimiter
	ld	(hl),080h		; set parity bit in delimiter
	pop	de

; Move words from WHERE through (ptrtop)-1 up by two bytes,
;	       to WHERE+2 through (ptrtop)+1.
; Then store WORD at WHERE, set (ptrtop) to (ptrtop)+2,
; and check that (ptrtop) has not encroached on (strbot).

; Load registers for an "lddr" operation.
	ld	hl,(ptrtop)
	and	a
	sbc	hl,de
	ld	b,h
	ld	c,l			; BC = number of bytes to move
					; byte count will always be nonzero
	ld	hl,(ptrtop)		; HL -> first byte beyond list
	ld	d,h
	ld	e,l
	dec	hl			; HL = initial source (ptrtop)-1
	inc	de			; DE = initial destination (ptrtop)+1
	lddr				; shift pointers up one word
	inc	hl			; now HL = WHERE
	ld	de,(strbot)		; DE -> string
	ld	(hl),e			; store WORD at WHERE
	inc	hl
	ld	(hl),d
	ld	hl,(ptrtop)		; advance (ptrtop) one word
	inc	hl
	inc	hl
	ld	(ptrtop),hl
	ld	de,(strbot)		; check for memory overflow
	sbc	hl,de
	jp	nc,nomem##

	pop	hl
	pop	de
	pop	bc
	pop	af

	end Insert


; Lookup procedure: find the position of a word in the table.
; WORD is the address of a string containing the word.
; Return with WHERE -> the word and carry clear (if found),
; or WHERE -> where to insert the word and carry set (if not found)
	proc Lookup [WORD:hl]->[WHERE:de]+C-a
	begin
	push	ix
	push	iy
	push	bc
	push	hl		; (SP) = WORD

; Binary search for the word.
; Register usage:
; IX -> lower bound of region
; IY -> upper bound of region
	ld	ix,(ptrbot)
	ld	iy,(ptrtop)

; Loop: split the region.
; If the region contains only one word, we're done splitting.
lkp1:
	push	iy
	pop	hl			; HL -> top of region
	push	ix
	pop	de			; DE -> base of region
	and	a
	sbc	hl,de			; HL = length of region
	srl	h			; divide HL by 2
	rr	l
	res	0,l			; force HL to be a multiple of 2
	ld	a,h			; see if HL has gone to zero
	or	l
	jr	z,lkp3			; branch if so, can't split region
	add	hl,de			; HL -> middle of region

; Now HL points to the middle of the region.
; Compare the word from there with the word that we're seeking.
	ld	b,h			; save region middle pointer in BC
	ld	c,l
	ld	e,(hl)			; follow pointer to string
	inc	hl
	ld	d,(hl)			; DE -> string at middle of region
	pop	hl			; set HL -> word being looked up
	push	hl
	WrdCmp [hl,de]->[]+C+Z-a	; compare words
	jr	z,lkp4			; branch if words are equal

; Here if words are different.
; Carry is set if new word < word at region mark, else clear.
; The new region is the first half of the old region if the new word is
; less, otherwise the new region is the second half of the old region.
	push	bc			; push middle region pointer
	jr	c,lkp2			; branch to use lower region
	pop	ix			; use upper region, raise region base
	jr	lkp1			; go investigate new region
lkp2:
	pop	iy			; use lower region, drop region bound
	jr	lkp1			; go investigate new region

; Here if we can't split the region.
; We've found two words between which the new word should be inserted.
lkp3:
	push	iy			; return WHERE = region bound
	pop	de
	scf				; set carry
	jr	lkp5

; Here if we've found the word.
lkp4:
	ld	d,b			; return WHERE -> word
	ld	e,c
	and	a			; clear carry

lkp5:					; return
	pop	hl
	pop	bc
	pop	iy
	pop	ix
	end Lookup


; Repl procedure: add the replacement (correctly spelled) word REPL
; to the entry in the word pointer list for the word at WHERE.
	proc Repl [WHERE:ix,REPL:de]
	begin
	push	af
	push	bc
	push	hl
	push	de			; (SP) = REPL

; Determine the length of the original (incorrectly spelled) word.
	ld	l,(ix+0)
	ld	h,(ix+1)		; HL -> word
	ld	bc,0			; begin count at 0
repl1:
	inc	hl			; step to next character
	inc	bc			; bump count
	bit	7,(hl)			; come to terminator?
	jr	z,repl1			; loop if not
	push	bc			; (SP) = lengthof original word

; Add the length of the replacement string, plus 2 for the terminators
	STRlng [hl=de]->[hl]		; HL = REPL length
	add	hl,bc			; combine with original word length
	inc	hl			; +1
	inc	hl			; +2

; Allocate a chunk of memory of that size.
	ld	b,h			; set BC = size of desired chunk
	ld	c,l
	ld	hl,(strbot)		; HL -> bottom of string space
	and	a
	sbc	hl,bc			; HL -> new chunk
	push	hl
	ld	bc,(ptrtop)		; make sure we haven't run into
	and	a			;   pointer list
	sbc	hl,bc			; set carry if we have
	jp	c,nomem##		; branch if out of memory
	pop	hl
	ld	(strbot),hl		; update string bottom pointer

; Copy in original word.
	ld	e,(ix+0)
	ld	d,(ix+1)		; DE -> original word
	pop	bc			; BC = length
	inc	bc			; add 1 for terminator
	STRcpy [de,hl,bc]		; copy word

; Replace word pointer in pointer list.
; The old word is lost to the great garbage heap in the sky.
	ld	(ix+0),l
	ld	(ix+1),h

; Place a parity-bit at word delimiter.
	dec	bc			; get back word length
	add	hl,bc			; HL -> word terminator
	ld	(hl),080h		; set parity bit
	inc	hl			; step to replacement

; Copy the replacement string.
	pop	de
	STRcpy [de,hl,bc=WrdSiz]

	pop	hl
	pop	bc
	pop	af

	end Repl


; Stat procedure:  if -t was specified, give statistics concerning
; the word storage chunk.
	proc Stat []
	begin
	push	af
	push	de
	push	hl

	ld	a,(tflg)		; was -t given?
	and	a
	jr	z,stat1			; if not, do nothing
	ld	de,(ptrbot)		; compute pointer space
	ld	hl,(ptrtop)
	sbc	hl,de			; HL = free pointer space
	TPUTF [stk="[Memory usage: pointers %u",stk=hl]
	ld	hl,(strtop)		; compute string space
	ld	de,(strbot)
	sbc	hl,de
	TPUTF [stk=" string %u",stk=hl]
	ld	hl,(strbot)		; compute free space
	ld	de,(ptrtop)
	sbc	hl,de
	TPUTF [stk=" free %u]^m^j",stk=hl]
stat1:
	pop	hl
	pop	de
	pop	af

	end Stat


; Sync procedure: synchronize in-memory word list with dictionary being read.
; WHERE is the current position in the word list.
; WORD, FLAGS, and possibly REPL correspond to the current dictionary line.
; Return with NEWWHERE updated to point to the first word in the word list
;   beyond the dictionary word, and set carry if there's no point in reading
;   the dictionary any farther (word list is exhausted), else clear carry.
	proc Sync [WHERE:ix,WORD:hl,REPL:de,FLAGS:a]->[NEWWHERE:ix]+C-a
	begin
	push	bc
	push	hl
	push	de
	ld	c,a			; C = FLAGS

; Program integrity check:
; Make sure IX hasn't gone off the end of the pointer list.
	push	ix
	ex	(sp),hl			; save old HL, set HL = IX
	push	de			; save old DE
	ld	de,(ptrtop)
	and	a
	sbc	hl,de			; set C if a gap remains
	pop	de			; restore registers
	pop	hl
	jr	c,sync1
	EPUTF [stk="? sp internal error - sync bounds exceeded^m^j"]
	SHLexi [a=0FFh]

; While the current word in the word list is less than the dictionary word:
;   advance to the next word in the word list.
sync1:
	ld	e,(ix+0)		; set DE = current word list word
	ld	d,(ix+1)
	WrdCmp [hl,de]->[]+C+Z-a	; compare
	jr	c,sync5			; branch if need next dictionary word
	jr	z,sync2			; branch if words are equal
	inc	ix			; step to next word list word
	inc	ix
	push	ix			; see if that's end of word list
	ex	(sp),hl			; (preserve HL)
	and	a
	ld	de,(ptrtop)
	sbc	hl,de
	pop	hl			; (restore HL)
	jr	c,sync1			; branch if not, continue
	scf				; return with carry set
	jr	sync6

; Here if the two words are equal.
; If there's a replacement, make a new copy of the word.
sync2:
	bit	STrepl,c		; is there a replacement?
	jr	z,sync3			; branch if no
	pop	de			; DE = REPL again
	push	de
	Repl [ix,de]			; make a replacement

; Copy the new flags into the word terminator.
sync3:
	ld	l,(ix+0)		; find (possibly new) word
	ld	h,(ix+1)
sync4:					; loop to find terminator
	inc	hl			; step to next character
	bit	7,(hl)
	jr	z,sync4
	ld	(hl),c			; store new FLAGS
	set	7,(hl)			; set parity again

; Here to return with carry clear.
sync5:
	and	a			; clear carry

sync6:
	pop	de
	pop	hl
	pop	bc

	end Sync


; Word compare routine: compare words WRD1 and WRD2.
; A word is terminated either by a null or by a byte with parity set.
; Return with C set if WRD1 < WRD2, Z set if WRD1 = WRD2, else C, Z clear.
	proc WrdCmp [WRD1:hl,WRD2:de]->[]+C+Z-a
	begin
	push	bc
	push	de
	push	hl
wrd1:

; Check for string terminators
; Set B to 1 if WRD1 terminates, else 0
; Set C to 1 if WRD2 terminates, else 0
	ld	a,(hl)			; A = next character of WRD1
	ld	b,0			; assume it doesn't terminate
	rlca				; parity bit to carry
	rl	b			; set B to 1 if parity was on
	and	a			; is A a null terminator?
	jr	nz,wrd2			; branch if not
	ld	b,1			; it is, set B
wrd2:
	ld	a,(de)			; A = next character of WRD2
	ld	c,0			; assume it doesn't terminate
	rlca				; parity bit to carry
	rl	c			; set C to 1 if parity was on
	and	a			; is A a null terminator?
	jr	nz,wrd3			; branch if not
	ld	c,1			; it is, set C
wrd3:

; If both strings terminate, return with Z set.
	ld	a,b
	and	c
	jr	z,wrd4			; branch if not both strings terminate
	xor	a			; set Z, clear carry
	jr	wrd8			; go return

; If only WRD1 terminates, set C and clear Z.
wrd4:
	ld	a,b
	and	a
	jr	z,wrd5			; branch if WRD1 doesn't terminate
	ld	a,1
	and	a			; clear Z
	scf				; set C
	jr	wrd8			; go return

; If only WRD2 terminates, clear C and Z.
wrd5:
	ld	a,c
	and	a
	jr	z,wrd6			; branch if WRD2 doesn't terminate
	ld	a,1
	and	a			; clear Z and C
	jr	wrd8			; go return

; Neither string terminates.
; Compare corresponding characters.
wrd6:
	ld	a,(de)
	cp	(hl)
	jr	nz,wrd7			; branch if strings differ here
	inc	de			; advance pointers and continue
	inc	hl
	jr	wrd1

; Here if corresponding characters differ
wrd7:
	ccf				; complement carry (Z is already clear)

wrd8:
	pop	hl
	pop	de
	pop	bc

	end WrdCmp


; Flag table.
flgtbl:
	db	'a'
	db	3
	dw	0,0
	dw	0,0
aflg:	db	0
aval:	dw	adict
	dw	0
	db	'b'
	db	3
	dw	0,0
	dw	0,0
bflg:	db	0
bval:	dw	defbeg##
	dw	0
	db	'c'
	db	0
	dw	0,0
	dw	0,0
cflg:	db	0
	dw	0,0
	db	'd'
	db	3
	dw	0,0
	dw	0,0
dflg:	db	0
dval:	dw	mdict
	dw	0
	db	'e'
	db	3
	dw	0,0
	dw	0,0
eflg:	db	0
eval:	dw	defend##
	dw	0
	db	'f'
	db	3
	dw	0,0
	dw	0,0
fflg:	db	0
fval:	dw	0
	dw	0
	db	'h'
	db	0
	dw	0,0
	dw	0,0
hflg:	db	0
	dw	0,0
	db	'i'
	db	1
	dw	0,0
	dw	0,WrdSiz-1
iflg:	db	0
	dw	0
ival:	dw	DefIFLG
	db	'j'
	db	3
	dw	0,0
	dw	0,0
jflg:	db	0
jval:	dw	0
	dw	0
	db	'l'
	db	1
	dw	0,0
	dw	0,0FFFFh
lflg:	db	0
	dw	0
lval:	dw	DefLFLG
	db	'm'
	db	3
	dw	0,0
	dw	0,0
mflg:	db	0
mval:	dw	mark
	dw	0
	db	'o'
	db	3
	dw	0,0
	dw	0,0
oflg:	db	0
oval:	dw	0
	dw	0
	db	'r'
	db	0
	dw	0,0
	dw	0,0
rflg:	db	0
	dw	0,0
	db	't'
	db	0
	dw	0,0
	dw	0,0
tflg:	db	0
	dw	0,0
	db	'w'
	db	0
	dw	0,0
	dw	0,0
wflg:	db	0
	dw	0,0
	db	0

wsfmt:	db	'^\..*$',0	; WordStar format pattern
mdict:	db	'maindict',0	; default main dictionary name
adict:	db	'auxdict',0	; default auxiliary dictionary name
mark:	db	'-->',0		; default mark
NULL:	db	0,080h		; null string
RUBOUT:	db	07Fh,080h	; rubout string
CRLF:	db	13,10,0		; carriage return, line feed string

; Upper to lower case translate table.
; Table only need cover legal word characters (letters and apostrophe)
trtbl	equ	$-' '
	db	' !"#$%&',39,'()*+,-./0123456789:;<->?'
	db	'@abcdefghijklmnopqrstuvwxyz[\]^_'
	db	'`abcdefghijklmnopqrstuvwxyz{|}~'

infnm:	dw	0		; -> input file name
inch:	dw	0		; input file channel
outch:	dw	0		; output file channel
dch:	dw	0		; main dictionary channel
ach:	dw	0		; auxiliary dictionary channel
jch:	dw	0		; jargon dictionary channel
fmtpat:	dw	0		; compiled format pattern
linlen:	dw	0		; line length (includes null terminator)
linbuf:	dw	0		; -> line buffer
wrkbuf:	dw	0		; -> work buffer
wrdbuf:	dw	0		; -> word buffer
ptrbot:	dw	0		; -> bottom of pointer list
ptrtop:	dw	0		; -> top of pointer list
strbot:	dw	0		; -> bottom of string space
strtop:	dw	0		; -> top of string space
