/*

Hello,

This program:  Word Clean

Author:  Brian D. Otte
         919 Fremont Street
         Kiel WI  53042-4821
         U.S.A.

        otteb59@mail.tcbi.com
        Tel. (920) 894-4013

Release Date:   11/20/1997
Release Version:11/20/1997

Purpose:Removes end of line hyphens and concatenates remainder of word
        from next line.  Also removes any chracter which is not an
        an ASCII character greater than 31 or less than ASCII 127.

Input:  *.txt file input as input redirection.
Output: to screen (or to file via out redirection)

Author's note:
This code compiles under Borland 3.0 C++.  I offer no warantee whatsoever.
If you want to use this code, by all means have at it.  Do you have any
suggestions, comments, or job offers?  E-mail me


What does this prog do?
Two main things:

1) example segments of input and output

Input                    Output
english-speaking   -> english-speaking       //no change
1986-1992          -> 1986-1992              //no change
-- unknown foo --  -> -- unknown foo --      //no change
aid of their co-   -> aid of their country.  //concanteates hyphenated word
untry.

2) Removes all characters which are not greater than ASCII 31 and lass than
	ASCII 127.  This results in one stream of text.
*/


#include <iostream.h>
#include <string.h>

#define TOKEN_LENGTH (128)//Wow is a word this long?
#define YES            (1)
#define NO             (0)


char* clean_up_word(char* token,short string_length);

int main(){

	char dirty_word[TOKEN_LENGTH];
	char clean_word[TOKEN_LENGTH];
	short concatenate_next=NO;
	short string_length;

	while(cin.eof()!=1){
		cin >> dirty_word;
		string_length=strlen(clean_word);

		if (concatenate_next){
			clean_word[string_length-1]='\0';//lop that hyphen
			clean_up_word(clean_word,string_length-1);//the string is less 1
			strcat(clean_word,clean_up_word(dirty_word,strlen(dirty_word)));
			concatenate_next=NO;//reset flag
		}//end if (concatenate_next)
		else{
			strcpy(clean_word,clean_up_word(dirty_word,strlen(dirty_word)));
			if(strlen(clean_word)>=2)//I check this becuase I don't want errors
				if((int)clean_word[strlen(clean_word)-1]==45)//is there a hyphen here?
					if((int)clean_word[strlen(clean_word)-2]!=45)//leave alone if two.
						concatenate_next=YES;//set flag
		}//end else
		if (!concatenate_next)
			cout << clean_word<< ' ';
		cin >> ws;          //Get the remaining stuffif any
	}//end while(cin.eof()) //All words have been read in

	cout <<'\n';

return 0;
}//end main


//This next bit of code walks through a word char by char 

char* clean_up_word(char* token,short string_length){

	char local_token[TOKEN_LENGTH];
	register short element=0;
	register short element2=0;

	for (element=0;element<=string_length;++element){
		if((int)token[element]>=32) //space
			if((int)token[element]<=126){//Tilda
			local_token[element2]=token[element];
			local_token[++element2]=NULL;//terminate
		}//end if
	}//end for (element=0;element<=string_length;++element)

	strcpy(token,local_token);
	return token;
}// end clean_up_word (char token)
