Monday, June 3, 2002

Convert text file formats between mac/dos/unix

I seriously doubt anybody would use this nowadays since OSX, dos2unix, and unix2dos have been around for a while. However, I still put this on since someone asked me to write this little utility and it might still be useful for somebody.
/*
convert.c -- convert text file formats between mac/dos/unix

by Tsan-Kuang Lee (2002/06/03)

Install:
Compile it and give it a common converter name, e.g. "convert".
gcc -o convert convert.c

soft link the three versions of the converter to it, i.e.

ln -s ~/bin/convert makedos
ln -s ~/bin/convert makemac
ln -s ~/bin/convert makeunix
Make sure they are in the path

Usage:
makedos file1 >target
this will convert file1 into dos format and pipe into target
If >target is omit, it prints out the file in dos format.
(In sum, if no options are given, stdin and stdout are assumed.)

similar for makeunix and makemac

Copyleft:
Do whatever you want. I wrote it in 15 minutes for my immediate need,
so it's very messy. Many quick and dirty (and ugly) lines. If you can
clean it up, I'd appreciate that.


=======================================================================

Explaination:

DOS, UNIX, MAC text files use different line break marker.

Line break:

mac : 0d (CR)
dos : 0d 0a (CR LF)
unix : 0a (LF)

Besides, DOS files end with CTRL_Z

dos: with ^z (0x22) at the end

(PS: this program omits the CTRL_Z because WINDOWS discards that)
*/

#define MAKEUNIX "makeunix"
#define MAKEDOS "makedos"
#define MAKEMAC "makemac"

#define FILETYPE_UNKNOWN (0)
#define FILETYPE_UNIX (1)
#define FILETYPE_DOS (2)
#define FILETYPE_MAC (3)
#define FILETYPE_BINARY (4)

#define CTRL_Z 0x1a

#include <stdio.h>
#include <string.h>

static void err_report(); /* error reporting function */
static int ChkFileType(FILE *pFile);
char* printtype (int filetype);
static FILE *Fpi; /* input file pointer */



static char program_path[127];

static int nCharLimit = 127;

extern int main(argc, argv)
int argc;
char **argv;
{
register int data;
int leng;
int from_filetype, to_filetype;

from_filetype = to_filetype = FILETYPE_UNKNOWN;

strcpy (program_path, argv[0]);
leng = strlen (program_path);

if (leng >= strlen (MAKEUNIX))
{
if (!strcmp(&(program_path[leng-8]),MAKEUNIX))
to_filetype = FILETYPE_UNIX;
}
if (leng >= strlen (MAKEDOS)) /* or MAKEMAC */
{
if (!strcmp(&(program_path[leng-strlen(MAKEDOS)]),MAKEDOS))
to_filetype = FILETYPE_DOS;
else if (!strcmp(&(program_path[leng-strlen(MAKEMAC)]),MAKEMAC))
to_filetype = FILETYPE_MAC;
}

if (to_filetype == FILETYPE_UNKNOWN)
err_report(2);

/* parse command line, open input file if needed */
switch (argc) {
case 1: {
Fpi = stdin;
break;
}
case 2: {
if ((Fpi = fopen(*++argv, "r ")) == NULL)
err_report(1);
break;
}
default:
err_report(0); /* emit usage message */
}

from_filetype = ChkFileType(Fpi);

// (void) fprintf(stderr,"Convert from %s to %s.\n", printtype(from_filetype), printtype(to_filetype));

fseek (Fpi, 0L, SEEK_SET);

while ( ! feof(Fpi)) {
data = fgetc(Fpi);
if (data == EOF) break; /* screen out nasty EOF */

if (data == 0x0a || data == 0x0d)
{
switch (from_filetype)
{
case FILETYPE_MAC: /* from MAC */
switch (to_filetype)
{
case FILETYPE_MAC:
fputc(0x0d,stdout);
break;
case FILETYPE_UNIX:
fputc(0x0a,stdout);
break;
case FILETYPE_DOS:
fputc(0x0d,stdout);
fputc(0x0a,stdout);
break;
default:
break;
}
break;
case FILETYPE_UNIX: /* from UNIX (already has 0x0a) */
switch (to_filetype)
{
case FILETYPE_MAC:
fputc(0x0d,stdout);
break;
case FILETYPE_UNIX:
fputc(0x0a,stdout);
break;
case FILETYPE_DOS:
fputc(0x0d,stdout);
fputc(0x0a,stdout);
break;
default:
break;
}
break;
case FILETYPE_DOS: /*from DOS (has 0x0d 0x0a) */
switch (to_filetype)
{
case FILETYPE_MAC:
if (data == 0x0d)
fputc(0x0d,stdout);
break;
case FILETYPE_UNIX:
if (data == 0x0a)
fputc(0x0a,stdout);
break;
case FILETYPE_DOS:
fputc(data,stdout);
break;
default:
break;
}
break;
default:
break;
} /* end switch */
}
else if (data != '\026') /* a control-Z */
(void) fputc(data,stdout);
}

/* this is taken away since my Windows doesn't seem to follow this rule */

// if (to_filetype == FILETYPE_DOS)
// (void) fputc('\026',stdout); /* a control-Z */

(void) fclose(Fpi);
exit(0);


}


static void err_report(n)
int n;
{
switch (n) {
case 0:
(void) fprintf(stderr,"Usage: %s <file> \n",program_path);
break;
case 1:
(void) fprintf(stderr,"%s: Can't open input file.\n",program_path);
break;
case 2:
(void) fprintf(stderr,"%s: Program filename must be MAKEUNIX, MAKEDOS, or MAKEMAC\n",program_path);
break;
}

exit(1);
}




static int ChkFileType(FILE *pFile)
{
auto size_t i;
auto size_t siz;
auto int nCR = 0;
auto int nLF = 0;
static unsigned char buf[1024];

siz = fread(buf, 1, sizeof(buf), pFile);
fseek(pFile, 0L, SEEK_SET);
if (0 == siz)
return(FILETYPE_BINARY);

for (i = 0; i < siz; ++i)
{
switch (buf[i])
{
case '\r': /* 0x0d */
++nCR;
break;

case '\n': /* 0x0a */
++nLF;
break;

case '\t':
case 11:
case 12:
case CTRL_Z:
break;

default:
if (buf[i] < ' ' || (int) buf[i] > nCharLimit)
return(FILETYPE_BINARY);

break;
}
}

// printf ("0d = %d, 0a = %d\n", nCR, nLF);

if (0 == nCR && nLF)
return(FILETYPE_UNIX);

if (0 == nLF && nCR)
return(FILETYPE_MAC);

return(FILETYPE_DOS);
}


char* printtype (int filetype)
{
switch (filetype)
{
case FILETYPE_MAC:
return("MAC");
break;
case FILETYPE_DOS:
return("DOS");
break;
case FILETYPE_UNIX:
return("UNIX");
break;
default:
return("ERROR");
}
}

No comments:

Post a Comment