#! /usr/bin/perl

# DBF2CSV.PL -- a program to convert .DBF files to .CSV format
# This program is uncopyrighted, so do with it whatever you wish.
# By Dave Burton, 5/28/00.
# Burton Systems Software, POB 4157, Cary, NC 27519-4157.
# email: dburton@salzo.cary.nc.us or dburton@burtonsys.com


$| = 1;
$debugmode = 0;
$prompt = 0;  # set to 1 for special interactive mode, for use with perl4w.exe

# Perl pack/unpack format representing the structure of the first
# 32 bytes in a .DBF file:
$DBF_header_fmt = "C" .   # version number at offset
                  "CCC" . # YY, MM, DD (one byte each)
                  "L" .   # Number of records in file
                  "S" .   # Length of header structure
                  "S" .   # Length of each record
                  "a20";  # 20 bytes that we don't care about

# Perl pack/unpack format representing the structure of each field descriptor
# (the 2nd-Nth 32-byte chunk):
$DBF_field_desc_fmt = "A11" .  # Field name in 0-terminated ASCII
                      "a" .  # Field type in ASCII
                      "L" .  # Field address in memory (unused)
                      "C" .  # Field length (binary)
                      "C" .  # Decimal count (binary)
                      "a" .  # reserved
                      "C" .  # Work area ID
                      "a" .  # reserved
                      "C" .  # Flag for SET FIELDS
                      "a7" . # reserved
                      "A";   # Index field flag


$cvt_failed = 0;  # set to 1 iff cvt1file failed


# Read infile (name of a .DBF file) and create outfile (name of a .CSV file).
#
sub cvt1file {
   local( $infile, $outfile ) = @_;
   local( $recnum ) = 0;
   local( $skipped ) = 0;

   $cvt_failed = 0;

   if (!open( DBF, $infile )) {
      print "Could not open '$infile', $!\n";
      $cvt_failed = 1;
      return 0;  # no records converted
   }
   binmode DBF;

   unlink $outfile;  # mostly in case outfile is on a buggy MARS_NWE volume, so we don't get trailing junk in the output file if it already existed and was bigger than the new output file

   if (!sysread(DBF, $buf, 32)) {
      print "Couldn't read 32 bytes from '$infile', $!\n";
      $cvt_failed = 1;
      return 0;  # no records converted
   }

   ( $verNum, $yy, $mm, $dd, $numrecs, $hdrLen, $recLen ) = unpack( $DBF_header_fmt, $buf );
   print "vernum=$verNum  ";
   printf "yy/mm/dd=%02d/%02d/%02d  ", $yy,$mm,$dd;
   print "numrecs=$numrecs\n";
   print "hdrLen=$hdrLen  ";
   print "recLen=$recLen  ";
   $numfields = (($hdrLen - 1) / 32) - 1;
   print "numfields=$numfields (per record)\n";

   $recfmt = "A";  # first byte of each record is the "deleted" indicator byte (normally blank)

   $running_offset = 1;
   $fld_nam[0] = '';
   $fld_ofs[0] = 0;
   $fld_len[0] = 1;
   $fld_typ[0] = 'C';

   for ($i=1; $i <= $numfields; $i++) {
      if (!sysread(DBF, $buf, 32)) {
          print "Couldn't read header for field $i from '$infile', $!\n";
          $cvt_failed = 1;
          return 0;  # no records converted
       }

      ( $fldName, $fldType, $fldOffset, $fldLen ) = unpack( $DBF_field_desc_fmt, $buf );
      if ($debugmode) {
         printf "%3d: %-10s type='%s' offset=%d fldLen=%d\n", $i, $fldName, $fldType, $fldOffset, $fldLen;
      }
      if ($running_offset != $fldOffset) {
         print "ERROR: running calculated offset, $running_offset, does not match field offset, $fldOffset.\n";
      }
      $fld_nam[$i] = $fldName;
      $fld_ofs[$i] = $fldOffset;
      $fld_len[$i] = $fldLen;
      $fld_typ[$i] = $fldType;
      $recfmt .= "A$fldLen";
      $running_offset += $fldLen;
   } #for
   $outfmt = '"%s"' . (',"%s"' x ($numfields-1)) . "\n";  # note: the DelFlg fields won't be output

   if (!sysread(DBF, $buf, 1)) {
      print "Couldn't read terminator byte from '$infile', $!\n";
      $cvt_failed = 1;
      return 0;  # no records converted
   }
   ("\r" eq $buf) || print "ERROR: Terminator byte at offset %d is not 0x0D\n", $recLen-1;

   if (!open( OUTP, ">$outfile" )) {
      print "Could not create '$outfile', $!\n";
      $cvt_failed = 1;
      return 0;  # no records converted
   }

   # echo field names to console:
   @tmp = @fld_nam;
   shift @tmp;  # don't output field #0 (the 'deleted' flag)
   foreach $fld (@tmp) {
      # remove leading and trailing whitespace and any embedded quote marks
      $fld =~ s/\s*(\S((.*\S)|)|)\s*/$1/;
      $fld =~ s/\"/\`/g;
   }
   print "The $numfields fields in each record are:\n";
   printf $outfmt,@tmp;

   # output field names as first line in output file:
   printf OUTP $outfmt,@tmp;

   $recnum = 0;
   while (sysread(DBF, $buf, $recLen)) {
      $recnum++;
#      if ($recnum > 5000) {
#         last;  # for debugging
#      }
      if (0 == ($recnum % 2000)) {
         print '.';
      }
      if ($buf =~ /\~/) {
         print "Warning: changed '~' to blank in record $recnum\n";
         $buf =~ tr/\~/ /;
         if ($buf =~ /\~/) {
            print "ERR: failed changed '~' to blank in record $recnum\n";
            exit 1;
         }
      }
      @fields = unpack( $recfmt, $buf );
      foreach $fld (@fields) {
         if ($fld =~ /[\s\"]/) {
            # remove leading and trailing whitespace and any embedded quote marks
            $fld =~ s/\s*(\S((.*\S)|)|)\s*/$1/;
            $fld =~ s/\"/\`/g;
         }
      }
      $deleted_flag = shift @fields;
      if ($deleted_flag ne '') {
         $skipped++;
         print "Warning: record $recnum is marked for delete; $skipped records skipped.\n";
         next;
      }
      printf OUTP $outfmt,@fields;

   } #while

   close OUTP;
   close DBF;
   if ($recnum >= 2000) {
      print "\n";  # because progress-indicator dots were printed
   }

  if ($recnum != $numrecs) {
     print "Warning: file should have had $numrecs records, but actually had $recnum records.\n";
  }
  return $recnum;

} #cvt1file


$errlevel = 0;


sub do_a_file {
   local( $infile ) = @_;
   local( $numRecords, $outFile );
   if ($inFile !~ /\.dbf$/i) {
      if ("-d" eq $infile) {
         $debugmode = 1 - $debugmode;
      } else {
         printf "ERROR: input file name '$inFile' does not end in '.dbf'\n";
         $errlevel = 2;
      }
   } else {
      $outFile = $inFile;
      if ($inFile =~ /\.DBF$/) {
         # if input file name was upper-case, make output file name upper-case, too
         $outFile =~ s/\.DBF$/\.CSV/i;
      } else {
         $outFile =~ s/\.dbf$/\.csv/i;
      }
      print "infile='$inFile'  outfile='$outFile'\n";
      $numRecords = &cvt1file( $inFile, $outFile );
      if ($cvt_failed) {
         $errlevel = 2;
      } else {
         $numRecords++;  # add one for the first record, with the field names in it
         print "Created $outFile from $inFile, $numRecords records.\n";
      }
   }
} #do_a_file


if (($#ARGV+1) < 1) {
   print "DBF2CSV -- Convert .DBF file to .CSV (comma-separated) format\n" .
         "\n" .
         "Usage:\n" .
         "   perl4s dbf2csv.pl infile.dbf ...\n" .
         "or (if you have a 32-bit Perl installed):\n" .
         "   perl dbf2csv.pl infile.dbf ...\n" .
         "or (to run interactively under Windows and prompt for the .dbf file):\n" .
         "   perl4w dbf2csv.pl -p\n" .
         "\n" .
         "For each infile.dbf, an infile.csv will be created.\n" .
         "(a dot will printed for every 2000 records, as a progress indicator)\n" .
         "\n" .
         "There will be one more record in infile.csv than infile.dbf, because the\n" .
         "field names are written to infile.csv as the first record.\n" .
         "\n" .
         "(Add -d to enable debug prints, e.g., \"perl4s dbf2csv.pl -d infile.dbf\".)\n" .
         "\n" .
         "Note: Unless you use a 32-bit version Perl instead of perl4s.exe or perl4w.exe,\n" .
         "this program will require \"8.3\" (short) file names.  So you will have to\n" .
         "rename your files if they have long file names.\n" .
         "\n" .
         "-by Dave Burton \<dave\@burtonsys.com\> 1-919-481-0149  (uncopyrighted)\n";
   exit 1;
} else {
   if ("-d" eq $ARGV[0]) {
      $debugmode = 1;
      shift @ARGV;
   }
   if ("-p" eq $ARGV[0]) {
      # Special interactive mode for use with Perl4w.exe
      $prompt = 1;
      shift @ARGV;
   }
   while ($#ARGV >= 0) {
      $inFile = $ARGV[0];
      shift @ARGV;
      &do_a_file( $inFile );
   } #while
   if ($prompt) {
      print "DBF2CSV -- Convert .DBF file to .CSV (comma-separated) format\n" .
            "\n" .
            "For each .dbf input file that you specify, an output file will be\n" .
            "created with the same name but a .csv extension.\n" .
            "\n" .
            "For big files, a dot will printed for every 2000 records, as a\n" .
            "progress indicator.  There is no limit on the size of the files,\n" .
            "but big files may take a long time to convert.\n" .
            "\n" .
            "There will be one more record in the .csv output file than in\n" .
            "the .dbf file, because the field names are written to infile.csv\n" .
            "as the first record.\n" .
            "\n" .
            "(Enter -d to enable debug prints.)\n" .
            "\n" .
            "Note that you might have to specify the full path of each .dbf file.\n";
      do {
         print "\n";
         if ($debugmode) {
            print "[debug mode enabled]\n";
         }
         print "Convert what .DBF file?  (or press Enter alone to quit) ";
         $inFile = <STDIN>;
         # remove leading and trailing whitespace (especially the CR at the end)
         $inFile =~ s/\s*(\S((.*\S)|)|)\s*/$1/;
         if ('' ne $inFile) {
            &do_a_file( $inFile );
         }
      } until ('' eq $inFile);
   }

   if (! $prompt) {
      exit $errlevel;
   }
   # An idiosyncracy of Perl4w.exe is that if you exit by dropping off
   # the end of the program it closes the Window, but if you exit by
   # calling 'exit' or 'die' then it leaves the window open.  Since we
   # want the window to close, we don't call 'exit'.
}

__END__

