Comment ~
Date: Thu, 18 Jan 1990  21:14 MST
From: Keith Petersen <w8sdz@WSMR-SIMTEL20.ARMY.MIL>
To: kirsch@arsocomvax.socom.mil
Subject: TOADTRIM

David, time for another Toad Hall program: TOADTRIM.  Trims trailing
spaces from each line in a text file.  Seems to me this is far better
done in MASM than C or Pascal.

--Keith

Comment	ends	~

TITLE	TOADTRIM	[900119]

;Yeah, we can do that ...
;David Kirschbaum
;Toad Hall
;kirsch@arsocomvax.socom.mil


CR	EQU	0DH
LF	EQU	0AH

BUFFSIZ	equ	(0E800H SHR 1)	;input buffer gets a little less than half
				;of remaining 64Kb code space.
				;It may run up to 1/2, however, as we read
				;in up to EOL.
				;Output buffer lies beyond input buffer.
				;Hopefully it won't hit our stack!
				;(Not making any tests for that right now!
				; Sooooo baaaad!)

CSEG	SEGMENT PUBLIC PARA 'CODE'
	ASSUME	CS:CSEG, DS:CSEG, ES:CSEG, SS:CSEG

	org	80H+126			;end of PSP cmdline

outstart label	word			;let's use it for a variable

	ORG	100H

ToadTrim	proc	near

handle	label	word			;var on top of this call (used once)
	call	StartUp			;Startup code down in buffer space


BuffLup:
	mov	dx,INBUFF		;input buffer base
	mov	cx,BUFFSIZ		;read a buffer full
	mov	bx,handle		;input file handle
	mov	ah,3FH			;read from file/device
	int	21H
	jb	Terminate		;read failed, die
	or	ax,ax			;anything read?
	jz	Terminate		;nope, terminate

	mov	si,dx			;remember input buffer start

	mov	di,dx			;input buffer start
	add	di,ax			;plus nr chars read
;DI => last input buffer char + 1

	cmp	ax,cx			;read all we asked for?
	jz	Got_EOL			;yep, that's all of it.

;We want to be sure our buffer ends with an EOL.

Find_Eol:
	cmp	byte ptr [di-1],CR	;is last char a CR
	jz	Got_EOL			;yep

;We have to read in one byte at a time until we get that EOL.
;There's enough extra space in the input buffer for this
;(so long as we're talking about reasonable lines here!).

	mov	cx,1			;one char
	mov	dx,di			;place to read next char
	mov	ah,3FH			;read from file/device
	int	21H			;(BX is still handle)
	jb	Terminate		;read failed, die
	or	ax,ax			;anything read?
	jz	Got_Eol			;nope, must be EOF
	 stosb				;yep, stuff char, bump DI
	 jmp	short Find_Eol		;loop back to retest for CR EOL

;Input buffer either ends with CR EOL, or we've hit EOF.
;SI => input buffer start
;DI => last input buffer char + 1
;
Got_Eol:
	mov	cx,di			; -> last char+1
	sub	cx,si			;minus buffer base = input char count
	jcxz	Terminate		;all done

;DI => output buffer start (beyond input buffer, ne?)
	mov	outstart,di		;so remember output starting ofs
	xor	bx,bx			;init space counter

CharLup:
	lodsb				;snarf input buffer char
	cmp	al,20H			;space?
	jnz	Not_Space		;nope
	 inc	bx			;yep, bump space counter
	 jmp	short Skip_Others	;skip other tests

Not_Space:
	cmp	al,CR			;hit CR?
	jz	Dump_Spaces		;yep, dump all those spaces

;Normal char, so we gotta put back the spaces we've been counting
	or	bx,bx			;skipped any spaces?
	jz	Skip_Spaces		;nope, nothing to do

	xchg	cx,bx			;CX = nr spaces we skipped
					;BX = old CX input char counter saved
	mov	ah,al			;save this non-space char
	mov	al,20H			;a space
	rep	stosb			;stuff back skipped spaces
	mov	al,ah			;replace non-space char
	xchg	cx,bx			;CX = saved input char counter
					;BX = 0 (reinit space counter)
Dump_Spaces:
	xor	bx,bx			;reinit space counter at a CR
Skip_Spaces:
	stosb				;stuff the normal char

Skip_Others:
	
	loop	CharLup			;for entire input buffer
;DI -> last output buffer char +1

	mov	dx,outstart		;output buffer starting ofs
	mov	cx,di			;buffer starting ofs
	sub	cx,dx			; minus last char ofs +1 = char count
	mov	bx,1			;StdOut
	mov	ah,40H			;write to file/device
	int	21H
	jmp	SHORT	BuffLup		;back to read another buffer-full

Terminate:
	mov	ah,4CH			;terminate (AL = any error,or 0)
	int	21H			;Let DOS close the output file

ToadTrim	endp

;Make this even without MASM's "even" instruction
INBUFF	equ	( ( ($ - CSEG) +1) SHL 1) SHR 1

Startup	proc	near

	mov	si,80H			;PSP cmdline length byte
	xor	ax,ax			;handy 0
	mov	handle,ax		;init handle to StdIn
	lodsb				;snarf cmdline length

;When he's using redirection
;StdIn alone ("<") produces a command line length of 1 (space, CR)
;StdOut alone (">" : same result.
;StdIn and StdOut ("<wherever >wherever") produces a command line
;length of 2 (space, space, CR).
;A question mark alone would produce a command line length of 2
;(space, "?", CR).
 
	or	ax,ax			;no cmdline?
	jz	Usage			; nope, give him help

	cmp	al,2			;2 or less?
	ja	Got_InFile		;nope, there's a filename
	jb	Started			; 1 = he's using StdIn or StdOut alone
					; No filenames to process, return

;Ok, cmdline length is 2.
;Let's see if he's using StdIn and StdOut,
;or if he's asked for help (e.g., "TOADTRIM ?")

	cmp	word ptr [si],'? '	;That's what help looks like
	jz	Usage			;It's help, all right
	ret				;He's using StdIn and StdOut
					;No filenames to process, return
Got_InFile:
;SI = 81H
	add	si,ax			;SI -> CR at filename end
	mov	[si],ah			;AsciiZ cmdline filename

	mov	dx,82H			;cmdline first char
	mov	ax,3D00H		;open file, read-only
	int	21H
	jb	Terminate		;open failed, die

	mov	handle,ax		;save real file handle
Started:
	ret

Usage:
	mov	dx,offset usage$
	mov	ah,9			;display msg
	int	21H
	mov	ax,4C00H		;terminate, errorlevel 0
	int	21H

Startup	endp

usage$	db 'TOADTRIM Trims trailing spaces from a (hopefully) text file,',CR,LF
	db '	sends trimmed result to Std Out.',CR,LF,CR,LF
	db 'Usage:',CR,LF
	db '	TOADTRIM filename.txt [>out device or filename]',CR,LF,CR,LF
	db '	Works with Std In also:',CR,LF
	db '	TOADTRIM <filename.txt [>output device or filename]',CR,LF,'$'

CSEG	ENDS
	END	ToadTrim
