/* http_get - fetch the contents of an http URL
**
** Originally based on a simple version by Al Globus <globus@nas.nasa.gov>.
** Debugged and prettified by Jef Poskanzer <jef@mail.acme.com>.  Also includes
** ifdefs to handle https via OpenSSL.
*/

#include "myhdr.h"


#define MAX_COOKIES 20
struct url_parts {
	char *url;
	char *method;
	char *hostname;
	int port;
	char *file;
	char *referer;
	char *user_agent;
	char *auth_token;
	int protocol;
	int timeout;
	int ncookies;
	char *cookies[MAX_COOKIES];
	dynstr xmlrpc;	/* the call argument */

	dynstr resp_head;
	dynstr resp_body;
	struct __obj *obj;	/* received or command line object */

	/* leftover arguments, for parsing */
	int argc;
	char **argv;
};

/* Forwards. */
static int getURL( struct url_parts *u);
static int getURLbyParts( struct url_parts *u);
static void sigcatch( int sig );
static int b64_encode( char* ptr, int len, char* space, int size );

/* Globals. */
static char* argv0;
static int verbose;

/* Protocol symbols. */
enum proto_type {
	PROTO_HTTP=0, PROTO_HTTPS
};


int
main( int argc, char** argv )
{
    int argn;
    struct url_parts u;
    int c, status;

    bzero(&u, sizeof(u));
    argv0 = argv[0];
    argn = 1;
    verbose = 0;
    u.timeout = 60;
    u.user_agent = "http_get";
    u.method = "GET";
    // XXX replace with getopt
    while ( (c = getopt(argc, argv, "vt:m:r:u:s:a:c:")) != -1) {
	switch (c) {
	default:
	    fprintf(stderr, "unknown argument %c\n", c);
	    usage();
	    break;
	case 'v':
	    verbose = 1; break;
	case 't':
	    u.timeout = atoi( optarg ); break;
	case 'm':
	    if (!strcasecmp(optarg, "POST"))
		u.method = "POST";
	    else 
		u.method = "GET";
	    break;
	case 'r':
	    u.referer = optarg; break;
	case 'u':
	    u.user_agent = optarg; break;
	case 'a':
	    u.auth_token = argv[argn]; break;
	case 'c':
	    if ( u.ncookies >= MAX_COOKIES ) {
		fprintf( stderr, "too many cookies\n");
		exit( 1 );
	    }
	    u.cookies[u.ncookies++] = optarg;
	    break;
	}
    }
	
    argc -= optind;
    argv += optind;
    if (argc == 0)
	usage();
    u.url = argv[0];
    if (argc > 1 && !u.xmlrpc) {
	dsprintf(&u.xmlrpc, "<?xml version='1.0'?>\n");
	expr_parse(argv[1], &u.obj, FMT_FUNC);
	obj_print(&u.xmlrpc, u.obj, FMT_XML);
	u.method = "POST";
	u.argc = argc - 2;
	u.argv = argv+2;
    }

    if (!*u.url || !strcmp(u.url, "-")) {
	/* empty URL, take the argument as the result */
	status = 0;
    } else { /* call the server */
	u.obj = obj_free(u.obj);
	signal( SIGALRM, sigcatch );
	status = getURL( &u);

	if ( status == 200 )
	    status = 0;
	else if ( status == 0 )
	    status = 1;
    }
    /* now print the results */
    if (!status) {
	dynstr r = NULL;
	int i;
	obj_print(&r, u.obj, FMT_FUNC);
	if (!u.argc)
	    fprintf(stdout, "%s\n", ds_data(r));
	if (verbose) // this goes to stderr!
	    fprintf(stderr, "%s\n", ds_data(r));
	for (i=0; i < u.argc; i++) {
		char *filter = u.argv[i];
		dynstr s = NULL;
		obj_filter(&s, u.obj, filter, FMT_FUNC);
		if (verbose) // this goes to stderr
		    fprintf(stderr, "apply filter %d %s : %s\n", i, filter, ds_data(s));
		fprintf(stdout, "%s\n", ds_data(s));
		ds_free(s);
	}
	ds_free(r);
    }
    // XXX free all objects in u
    exit( status );
}


void
usage()
{
    fprintf( stderr, "usage: program [-c cookie] [-t timeout] [-r referer] [-u user-agent] [-a username:password] [-v] url\n");
    exit( 1 );
}


/* URL must be of the form http://host-name[:port]/file-name */
static int
getURL( struct url_parts *u)
{
    char* s, *h;
    char host[2000];
    int host_len;
    int proto_len;

    if ( u->url == NULL) {
	fprintf( stderr, "null URL\n");
        exit( 1 );
    }
    if ( strncmp( "http://", u->url, 7 ) == 0 ) {
	proto_len = 7;
	u->protocol = PROTO_HTTP;
    }
#ifdef USE_SSL
    else if ( !strncasecmp( "https://", u->url, 8 ) ) {
	proto_len = 8;
	u->protocol = PROTO_HTTPS;
    }
#endif /* USE_SSL */
    else {
	fprintf( stderr, "unsupported URL %s\n", u->url );
        exit( 1 );
    }

    /* Get the host name. */
    h = u->url + proto_len;	/* the host part */
    for ( s = h; *s != '\0' && *s != ':' && *s != '/'; ++s )
	;
    host_len = s - h;
    if (host_len > sizeof(host) - 1)
	host_len = sizeof(host) - 1;
    strncpy( host, h, host_len );
    host[host_len] = '\0';
    u->hostname = host;

    /* Get port number. */
    if ( *s == ':' ) {
	u->port = (unsigned short) atoi( ++s );
	while ( *s != '\0' && *s != '/' )
	    ++s;
    } else {
#ifdef USE_SSL
	if ( u->protocol == PROTO_HTTPS )
	    u->port = 443;
	else
#endif
	    u->port = 80;
    }

    /* Get the file name. */
    u->file = ( *s == '\0' ) ? "/" : s;

    return getURLbyParts( u ); //protocol, host, port, method, file, referer, user_agent, auth_token, ncookies, cookies );
}


/* Header FSM states.
 * Depending on the input char we move from one state to the next one.
 */
enum hdr_states {
	HDST_LINE1_PROTOCOL, HDST_LINE1_STATUS,
	HDST_TEXT,
	HDST_LF, HDST_CR, HDST_CRLF, HDST_CRLFCR,
};
/* parse the header 1 char at a time, returns 1 when done.
 * sets status and internal state.
 */
static int
parse_hdr_char(enum hdr_states *_s, int *http_status, char c)
{
    enum hdr_states header_state = *_s;
    switch ( header_state ) {
    case HDST_LINE1_PROTOCOL:
        *http_status = 0;
	if (c == ' ' || c == '\t') {
	    header_state = HDST_LINE1_STATUS; /* should come next */
	} else if (c == '\n')
		header_state = HDST_LF ;
	else if (c == '\r')
		header_state = HDST_CR;
	break;
    case HDST_LINE1_STATUS:
	if (c >= '0' && c <= '9') {
	    *http_status = *http_status * 10 + c - '0';
	} else if (c == '\n') 
	    header_state = HDST_LF ; 
	else if (c == '\r') 
	    header_state = HDST_CR; 
	else
	    header_state = HDST_TEXT;
	break;
    case HDST_TEXT:
	if (c == '\n')
		header_state = HDST_LF;
	else if (c == '\r')
		header_state = HDST_CR;
	break;

    case HDST_LF:
	if (c == '\n')
		return 1;
	else if (c == '\r')
		header_state = HDST_CR;
	else
		header_state = HDST_TEXT;
	break;

    case HDST_CR:
	if (c == '\r')
		return 1;
	else if (c == '\n')
		header_state = HDST_CRLF;
	else
		header_state = HDST_TEXT;
	break;

    case HDST_CRLF:
	if (c == '\n')
		return 1;
	else if (c == '\r')
		header_state = HDST_CRLFCR;
	else
		header_state = HDST_TEXT;
	break;

    case HDST_CRLFCR:
	if (c == '\n' || c == '\r')
		return 1;
	header_state = HDST_TEXT;
	break;
    }
    *_s = header_state;
    return 0;
}

static int
getURLbyParts(struct url_parts *u)
{
    struct my_sock *fd;
    dynstr head_buf = NULL;
    dynstr body = NULL;
    int i, bytes, status;
    enum hdr_states header_state;
    int b = 0;
    char buf[1024];

    alarm( u->timeout );
    fd = open_client_socket( u->hostname, u->port, u->protocol == PROTO_HTTPS );

    /* Build request buffer, starting with the method. */
    alarm( u->timeout );
    dsprintf( &head_buf, "%s %s HTTP/1.0\r\n", u->method, u->file );
    /* HTTP/1.1 host header - some servers want it even in HTTP/1.0. */
    dsprintf( &head_buf, "Host: %s\r\n", u->hostname );
    if ( u->referer )
	dsprintf( &head_buf, "Referer: %s\r\n", u->referer );
    /* User-agent. */
    dsprintf( &head_buf, "User-Agent: %s\r\n", u->user_agent );
    /* Fixed headers. */
    dsprintf( &head_buf, "Accept: */*\r\n" );
    dsprintf( &head_buf, "Accept-Encoding: gzip, compress\r\n" );
    dsprintf( &head_buf, "Accept-Language: en\r\n" );
    dsprintf( &head_buf, "Accept-Charset: iso-8859-1,*,utf-8\r\n" );
    if ( u->auth_token ) {
	/* Basic Auth info. */
	char token_buf[1000];
	token_buf[b64_encode( u->auth_token, strlen( u->auth_token ), token_buf, sizeof(token_buf) )] = '\0';
	dsprintf( &head_buf, "Authorization: Basic %s\r\n", token_buf );
    }
    /* Cookies. */
    for ( i = 0; i < u->ncookies; ++i )
	dsprintf( &head_buf, "Cookie: %s\r\n", u->cookies[i] );
    if (!strcasecmp(u->method, "POST")) {
	// dsprintf( &head_buf, "Content-type: application/x-www-form-urlencoded\r\n" );
	dsprintf( &head_buf, "Content-type: text/xml\r\n" );
	// XXX put the actual length
	/* read from stdin if body is not specified */
	if (u->xmlrpc)
	    body = u->xmlrpc;
	else for (;;) {
	    bytes = read(0, buf, sizeof(buf));
	    if (bytes <= 0)
		break;
	    dsprintf(&body, buf, bytes);
	}
	dsprintf( &head_buf, "Content-length: %d\r\n", ds_len(body) );
    }
    /* Blank line. */
    dsprintf( &head_buf, "\r\n" );
    /* Now actually send it. */
    safe_write(fd, ds_data(head_buf), ds_len(head_buf));
    if (body)
	safe_write(fd, ds_data(body), ds_len(body));

    /* Get lines until a blank one. */
    alarm( u->timeout );
    header_state = HDST_LINE1_PROTOCOL;
    status = 0;
    for (;;) {
        int i = 0;
	bytes = safe_read( fd, buf, sizeof(buf) );
	if ( bytes <= 0 )
	    break;
	for ( b = 0; b < bytes; ++b ) {
	    char c = buf[b];
	    if ( verbose )
		write( 1, &c, 1 );
	    i= parse_hdr_char(&header_state, &status, c);
	    if (i)
		break;
	}
	if (b)
		dsprintf(&u->resp_head, buf, b);
	if (i)
	    break;
    }
    /* Dump out the rest of the headers buffer. */
    ++b;
    if (verbose)
	write( 1, &buf[b], bytes - b );
    if (bytes - b)
	dsprintf(&u->resp_body, buf+b, bytes - b);

    /* Copy the data. */
    for (;;) {
	if (verbose)
		fprintf(stderr, "now writing data\n");
	alarm( u->timeout );
	bytes = safe_read( fd, buf, sizeof(buf) );

	if ( bytes < 0 )
	    perror( "read" );
	if ( bytes <= 0 )
	    break;
	ds_append(&u->resp_body, buf, bytes);
	if (verbose)
	    write( 1, buf, bytes );
    }
    safe_close(fd);
    if (verbose) {
	fprintf(stderr, "resp-hdr: [\n%s\n]\n", ds_data(u->resp_head));
	fprintf(stderr, "resp-body: len %d 0x%x [\n%s\n]\n",
		ds_len(u->resp_body),
		ds_len(u->resp_body),
		ds_data(u->resp_body));
    }
    expr_parse(ds_data(u->resp_body), &u->obj, FMT_XML);
    return status;
}

static void
sigcatch( int sig )
{
    fprintf( stderr, "%s: timed out\n", argv0 );
    exit( 1 );
}


/* Base-64 encoding.  This encodes binary data as printable ASCII characters.
** Three 8-bit binary bytes are turned into four 6-bit values, like so:
**
**   [11111111]  [22222222]  [33333333]
**
**   [111111] [112222] [222233] [333333]
**
** Then the 6-bit values are represented using the characters "A-Za-z0-9+/".
*/

static char b64_encode_table[64] = {
    'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',  /* 0-7 */
    'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',  /* 8-15 */
    'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',  /* 16-23 */
    'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',  /* 24-31 */
    'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',  /* 32-39 */
    'o', 'p', 'q', 'r', 's', 't', 'u', 'v',  /* 40-47 */
    'w', 'x', 'y', 'z', '0', '1', '2', '3',  /* 48-55 */
    '4', '5', '6', '7', '8', '9', '+', '/'   /* 56-63 */
    };

static int b64_decode_table[256] = {
    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* 00-0F */
    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* 10-1F */
    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,-1,-1,-1,63,  /* 20-2F */
    52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-1,-1,-1,  /* 30-3F */
    -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,  /* 40-4F */
    15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1,  /* 50-5F */
    -1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,  /* 60-6F */
    41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1,  /* 70-7F */
    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* 80-8F */
    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* 90-9F */
    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* A0-AF */
    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* B0-BF */
    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* C0-CF */
    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* D0-DF */
    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* E0-EF */
    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1   /* F0-FF */
    };

/* Do base-64 encoding on a hunk of bytes.   Return the actual number of
** bytes generated.  Base-64 encoding takes up 4/3 the space of the original,
** plus a bit for end-padding.  3/2+5 gives a safe margin.
*/
static int
b64_encode( char* ptr, int len, char* space, int size )
{
    int ptr_idx = 0, space_idx = 0, phase = 0;

    for ( ptr_idx = 0; ptr_idx < len; ++ptr_idx ) {
	char c;

	switch ( phase ) {
	case 0:
	    c = b64_encode_table[ptr[ptr_idx] >> 2];
	    if ( space_idx < size )
		space[space_idx++] = c;
	    c = b64_encode_table[( ptr[ptr_idx] & 0x3 ) << 4];
	    if ( space_idx < size )
		space[space_idx++] = c;
	    ++phase;
	    break;
	case 1:
	    space[space_idx - 1] =
	      b64_encode_table[
		b64_decode_table[(int) space[space_idx - 1]] |
		( ptr[ptr_idx] >> 4 ) ];
	    c = b64_encode_table[( ptr[ptr_idx] & 0xf ) << 2];
	    if ( space_idx < size )
		space[space_idx++] = c;
	    ++phase;
	    break;
	case 2:
	    space[space_idx - 1] =
	      b64_encode_table[
		b64_decode_table[(int) space[space_idx - 1]] |
		( ptr[ptr_idx] >> 6 ) ];
	    c = b64_encode_table[ptr[ptr_idx] & 0x3f];
	    if ( space_idx < size )
		space[space_idx++] = c;
	    phase = 0;
	    break;
	}
    }
    /* Pad with ='s. */
    while ( phase++ < 3 )
	if ( space_idx < size )
	    space[space_idx++] = '=';
    return space_idx;
}
