; srt.xm: file sort unicum
; /AJK 8.Aug.81, 27.Aug.81

;    _______
;   |      /
;   |     /
;   |    /    Copyright (c) 1981 by Knowlogy
;   |   //\                         PO Box 283
;   |  //  \                        Wilsonville, Oregon  97070
;   | //    \
;   |//______\

	uses LIB2800
	uses LIB2801

; Version 1 is strictly an in-memory sort.  Very large files cause the
; program to abort.  Version 2 will sort files larger than available
; memory.

LinLen	equ	256		; max acceptable input line length

; The file is sorted by forming its lines into a binary tree.
; The structure of each tree node is as follows:
TRlf	equ	0		; (word) -> left daughter
				; this becomes "-> parent" during output
TRrt	equ	2		; (word) -> right daughter
TRpt	equ	4		; (word) -> position in data to compare from
				; low byte becomes "visited" flag during output
TRcnt	equ	6		; (word) count of occurrences
TRstr	equ	8		; null terminated string starts here

; Scalars involved in the sort are:
; head -- points to the top node of the tree; zero for empty tree
; last -- points to the last line inserted if its path involves only right
;         daughters, otherwise contains zero.

; As a heuristic, when a line is inserted which comes after all other lines
; so far encountered, then the next line of input is first compared with that
; line before it begins its descent through the tree.  This prevents the
; quadratic behavior exhibited by a pure tree sort of already-sorted data.

	db	'SRT V1: COPYRIGHT (C) 1981 BY KNOWLOGY',13,10,26,0

	entry srt
srt:

; Initialize and scan command.
	HEAhea [hl=0100h]	; initialize stack and heap
	USKall [hl=LinLen]->[(linptr)=hl] ; allocate the line buffer
	USKini []		; scan commands
	USKflg [hl=flgtbl]	; scan flags
	USKdef [stk="-",stk=0]	; default argument is "-"
	USKgna []->[hl]+C-a	; get first argument
	jr	c,srt1		; branch if no first argument
	ex	de,hl		; save argument in DE
	USKgna []->[hl]+C-a	; ensure no second argument
	jr	c,srt2
srt1:
	EPUTF [stk="Usage: srt [-c] [-i] [-r] [-u] [-k nn] [file] [>output.file]^m^j"]
	SHLexi [a=1]

; Here with input file name in DE.  "-" means read from standard input.
; Open the input file.
srt2:
	ex	de,hl		; HL -> input filename again
	ld	(fname),hl	; store filename pointer
	xor	a		; assume channel 0 input
	STRcmp [de="-",hl=hl]->[]+C+Z ; see if standard input is selected
	jr	z,srt3		; branch if so, just read from channel 0
	IO.opn [stk=hl,stk=RO+Text+OldOnly]->[a]+C ; open the file
	jp	c,srt25		; branch if unsuccessful open
srt3:
	ld	(inch),a	; stow the input channel

; Top of loop to read and sort a line.
srt4:
	LIOgl [stk=(inch),stk=(linptr),stk=LinLen]->[]+C
	jp	c,srt14		; branch when end-of-file encountered

; Given the column at which the key begins, as indicated by the -k flag,
; find the position in the string at which to begin comparing.
	ld	ix,(linptr)	; IX -> string
	ld	de,(kflg)	; DE = column number at which to start compare
	dec	de		; decrement, so first column is number 0
	ld	bc,0		; BC records current position
srt5:
	ld	h,b		; see if we've come to the desired column
	ld	l,c
	and	a		; (clear carry for subtract)
	sbc	hl,de		; clear carry if we've arrived at the column
	jr	nc,srt6		; branch when desired column achieved
	ld	a,(ix+0)	; A = next character to consider
	and	a		; check for end-of-string
	jr	z,srt6		; branch when encountered, stop there
	inc	ix		; advance past this character
	inc	bc		; step the column number
	cp	'I'-64		; is it a tab?
	jr	nz,srt5		; if not, no extra positioning is necessary
	ld	hl,7		; advance BC to next (8-column) tab stop
	add	hl,bc		; HL = (old position) + 8
	ld	a,l		; round down to a multiple of 8
	and	NOT 7
	ld	b,h		; put rounded value in BC
	ld	c,a
	jr	srt5		; go test new position
srt6:				; here when IX -> character at which to compare
	ld	(cmptr),ix	; save comparison pointer

; Ready to compare.  See if we should compare with the last-inserted node.
; If so and if the new line is "greater" than the last, we can immediately
; insert the new line as the right daughter of the old; if it's "equal",
; we can immediately bump the counter for the last node.
	ld	hl,(last)	; HL -> last-inserted node or zero
	ld	a,h
	or	l
	jr	z,srt9		; branch if no last-inserted node
	push	hl
	pop	ix		; IX -> last-inserted node
	call	cmp		; compare line with last node
	jr	c,srt8		; branch if line is "less than" last node
	jr	nz,srt7		; branch if line is "greater"
	ld	l,(ix+TRcnt)	; they're equal, just bump the node's count
	ld	h,(ix+TRcnt+1)
	inc	hl
	ld	(ix+TRcnt),l
	ld	(ix+TRcnt+1),h
	jp	srt4
srt7:				; here if line is greater than last node
	call	alc		; allocate the new node
	ld	(ix+TRrt),l	; make it last node's daughter
	ld	(ix+TRrt+1),h
	ld	(last),hl	; new node is now "last-inserted" node
	jp	srt4		; done with this node

; Here if line is "less than" old node.  Clear "last inserted node" pointer
srt8:
	ld	hl,0
	ld	(last),hl

; Here to begin descending through the tree.
srt9:
	ld	ix,head		; IX -> tree head pointer
	xor	a		; clear "left descent" flag
	ld	(lftflg),a	; .. nonzero if a left descent ever occurs

; Here with IX -> a pointer cell, either "head" or a left or right daughter
; pointer.  If the cell is now empty, we install a new node there,
; otherwise we compare with the node stored there and either bump its
; counter or descend to one of its daughters.
srt10:
	ld	l,(ix+0)
	ld	h,(ix+1)	; HL = contents of pointer cell
	ld	a,h
	or	l
	jr	nz,srt11	; branch if somebody's there
	call	alc		; else install a node there
	ld	(ix+0),l
	ld	(ix+1),h

; If the path we took to arrive here involved no descents through left
; daughters, then we are at the "last" line in the tree.  Set the "last"
; pointer to address this node.
	ld	a,(lftflg)	; check "left descent" flag
	and	a
	jp	nz,srt4		; branch if a left descent occurred
	ld	(last),hl	; else remember this as new last node
	jp	srt4		; done with this node

; Here if the daughter is there; compare the current node with it.
; If the new line is "less than" the established one, descend through its
; left daughter; if the new line is "greater than" the established one,
; descend through its right daughter; if the new line is "equal to" to
; established one, bump the established node's counter.
srt11:
	push	hl
	pop	ix		; now IX -> cell against which to compare
	call	cmp		; compare current node with tree node
	jr	c,srt12		; branch if "less than", descend left
	jr	nz,srt13	; branch if "greater", descent right

; Lines are equal, just bump count.
	ld	l,(ix+TRcnt)
	ld	h,(ix+TRcnt+1)
	inc	hl
	ld	(ix+TRcnt),l
	ld	(ix+TRcnt+1),h
	jp	srt4		; done with this line

; Descend through left daughter.
srt12:
	ld	a,1		; set "left-descent" flag
	ld	(lftflg),a
	ld	bc,TRlf
	add	ix,bc		; IX -> left daughter pointer cell
	jr	srt10

; Descend through right daughter.
srt13:
	ld	bc,TRrt
	add	ix,bc		; IX -> right daughter point cell
	jr	srt10

; Here when file has been read and sorted.
; Close the input file.  Then if the -i flag was given, open it for output,
; otherwise specify that output will go to channel 1.
srt14:
	ld	a,1		; assume output goes to channel 1
	ld	(ouch),a
	ld	a,(inch)	; see if we read from standard input
	and	a
	jr	z,srt15		; if so, do no file manipulation
	IO.cls [stk=(inch)]->[a,hl]+C ; close the input file
	jp	c,srt25		; branch if unsuccessful close
	ld	a,(iflg)	; see if we should overwrite input file
	and	a
	jr	z,srt15		; branch if not, write to standard output
	ld	hl,(fname)	; HL -> argument file name
	IO.opn [stk=hl,stk=WO+Replace+Text]->[a]+C
	jp	c,srt25		; branch if file can't be opened
	ld	(ouch),a	; store output channel number

; Perform an in-order walk of the binary tree: visit the top node, where
; "visit" is defined recursively to mean visit the node's left daughter,
; write the node's information to the output file, then visit the node's
; right daughter.
; In order to avoid using a stack to retain our current tree path, we
; change each node's "left daughter" pointer into a "parent" pointer, and
; zero the "parent" of the top node.  We also change the low byte of the
; "compare position" pointer to zero when we have printed the node's line and
; are traversing the right node.
srt15:
	ld	hl,(head)	; HL -> top node
	ld	a,h		; check for empty tree
	or	l
	jp	z,srt24		; branch if tree is empty, nothing to write
	push	hl
	pop	ix		; IX -> top node
	ld	bc,0		; BC = (non-existent) "parent" of top node

; Here to visit a node.
; IX -> node to visit.
; BC -> node's parent.
srt16:
	ld	(ix+TRpt),1	; indicate that the node has not been visited
	ld	l,(ix+TRlf)
	ld	h,(ix+TRlf+1)	; HL -> node's left daughter
	ld	(ix+TRlf),c
	ld	(ix+TRlf+1),b	; overwrite left daughter pointer with parent
	ld	a,h		; see if left daughter exists
	or	l
	jr	nz,srt22	; branch if it does, visit it

; Here to print the node's information.
; IX -> node.
srt17:
	ld	c,(ix+TRcnt)
	ld	b,(ix+TRcnt+1)	; BC = node's count
	ld	a,(cflg)	; are we giving counts?
	and	a
	jr	z,srt18		; branch if not
	OPUTF [stk="%6u: ",stk=bc] ; give the count.  NOTE THAT THIS MUST
				; OCCUPY EIGHT COLUMNS OR TABS WON'T BE RIGHT!
	jr	srt19		; go print the line
srt18:				; here if -c not specified
	ld	a,(uflg)	; are we printing lines uniquely?
	and	a
	jr	z,srt20		; branch if not, produce the line n times
srt19:				; here if -c or -u
	ld	bc,1		; print line only once
srt20:				; here with BC = number of times to print line
	push	ix		; print the line for this node
	pop	hl
	ld	de,TRstr
	add	hl,de		; HL -> line to output
srt21:
	LIOpl [stk=(ouch),stk=hl] ; put the line
	dec	bc		; count down
	ld	a,b		; loop until count becomes zero
	or	c
	jr	nz,srt21
	ld	(ix+TRpt),0	; indicate that the node has been visited
	ld	l,(ix+TRrt)
	ld	h,(ix+TRrt+1)	; HL -> node's right daughter
	ld	a,h		; see if right daughter exists
	or	l
	jr	z,srt23		; branch if it doesn't, don't visit it

; Here to descend to visit a daughter.
; IX -> parent.
; HL -> daughter.
srt22:
	push	hl		; (SP) -> daughter node
	ex	(sp),ix		; IX -> new node to visit, (SP) -> parent
	pop	bc		; BC -> parent of node to visit
	jr	srt16		; go visit the daughter

; Here to ascend to a parent.
; IX -> current node.
srt23:				; here when done visiting a node
	ld	l,(ix+TRlf)
	ld	h,(ix+TRlf+1)	; HL -> node's parent
	ld	a,h		; stop when we've ascended above treetops
	or	l
	jr	z,srt24
	push	hl
	pop	ix		; IX -> node's parent
	ld	a,(ix+TRpt)	; see if we've visited this node
	and	a
	jr	z,srt23		; if so, ascend to the grandparent
	jr	srt17		; else go visit and do right daughter

; Here when all done.
srt24:
	SHLexi [a=0]

; Here on error.
srt25:
	ERRMSG [a=a,b=1,c=1,hl=hl]


; Alc routine: allocate a node for the current line.
; Exit with HL -> new node.
; Destroys AF, BC, DE.
alc:
	STRlng [hl=(linptr)]->[hl] ; HL = length of new line
	ld	bc,TRstr+1	; add room for pointers and null delimiter
	add	hl,bc
	USKall [hl=hl]->[hl]	; allocate a node
	push	hl
	ex	(sp),ix		; save IX, set IX -> new node
	xor	a		; zero the daughter pointers
	ld	(ix+TRlf),a
	ld	(ix+TRlf+1),a
	ld	(ix+TRrt),a
	ld	(ix+TRrt+1),a
	ld	(ix+TRcnt),1	; set count to 1
	ld	(ix+TRcnt+1),a
	ld	de,TRstr	; compute offset to comparison point
	add	hl,de		; now HL -> beginning of string
	push	hl		; save that address
	ld	de,(cmptr)
	add	hl,de		; now HL = comparison point + addr(line)
	ld	de,(linptr)
	and	a		; (clear carry for subtract)
	sbc	hl,de		; now HL -> comparison point
	ld	(ix+TRpt),l	; store it
	ld	(ix+TRpt+1),h
	pop	hl		; HL -> beginning of string again
	STRcpy [de=(linptr),hl=hl,bc=LinLen] ; copy in the string
	ex	(sp),ix		; restore IX, stack address of node
	pop	hl		; HL -> new node
	ret

; Cmp routine: compare current line with a node in the tree.
; Enter with IX -> tree node.
; Exit with carry set if line is less than tree node;
;           carry clear if line is greater than or equal;
;           zero flag set if line is equal.
; Destroys AF, BC, DE, HL.
; First, compare from the comparison columns.  If the keys are
; equal, then compare the entire lines.
cmp:
	ld	e,(ix+TRpt)
	ld	d,(ix+TRpt+1)	; DE -> comparison position in established node
	STRcmp [de=de,hl=(cmptr)]->[]+Z+C ; set C if "less than", Z if "equal"
	jr	nz,cmp1		; branch if keys aren't equal
	ld	hl,(kflg)	; equal, see if we're comparing from column 1
	dec	hl
	ld	a,h
	or	l		; (clears carry)
	ret	z		; if so, entire lines are equal, return
	push	ix
	pop	hl		; HL -> old node
	ld	bc,TRstr	; BC = node offset to start of line
	add	hl,bc		; -> line in old node
	ex	de,hl		; DE -> line in old node
	STRcmp [de=de,hl=(linptr)]->[]+Z+C ; compare entire lines
	ret	z		; if they're still equal, return
				; otherwise continue with new inequality
cmp1:
	ld	a,(rflg)	; A is 1 if -r was specified
	rl	b		; invert carry if -r was specified
	xor	b
	or	2		; clear Z flag
	rra			; set carry if current node "less"
	ret

; Flag table.
flgtbl:
	db 'c',0,0,0,0,0,0,0,0,0,0,0,0
cflg:	dw 0
	db 'i',0,0,0,0,0,0,0,0,0,0,0,0
iflg:	dw 0
	db 'k',1
	dw 0,1
	dw 0,LinLen
	db 0,0,0
kflg:	dw 1
	db 'r',0,0,0,0,0,0,0,0,0,0,0,0
rflg:	dw 0
	db 'u',0,0,0,0,0,0,0,0,0,0,0,0
uflg:	dw 0
	db 0

head:	dw	0		; -> head of tree
last:	dw	0		; -> last inserted node if rightmost line

lftflg:	ds	1		; left insertion flag
inch:	ds	1		; input channel number
ouch:	ds	1		; output channel number
linptr:	ds	2		; -> line input buffer
fname:	ds	2		; -> filename
cmptr:	ds	2		; -> this line's comparison point
currnt:	ds	2		; -> current node

	end srt
