Cover V04, I06
Article
Listing 1
Sidebar 1

nov95.tar


Listing 1: convert.c--The conversion tool

/*  September 3, 1995
*
*  FILE NAME:  convert.c - Cross-Database ASCII Data Conversion Utility
*
*     AUTHORS:  Chuck Leonard
*               Ed Schaefer
*
*  Used to convert pipe symbol delimited files (INFORMIX like unload files)
*  to Comma Separated Value (CSV) files and back again.
*
*  PLATFORM COMPILATION NOTES:
*
*    UNIX:  cc -DUNIX convert.c -o convert
*
*     DOS:  No special notes.
*
*  RUN NOTES:
*
*      convert pcd.sql
*
*    The file "pcd.sql" is the SQL schema like definition containing
*    the schema of the data being converted and the name of the file
*    containing the data.
*
*    NOTE:  the file name specified in the schema definition
*           (the name following the "unload file name =" keyword)
*           WILL be overwritten with the converted delimited data
*           (i.e. if "pcdtablr.unl" were the name of the data file being
*           converted, the resulting converted data would be written
*           back to "pcdtablr.unl").
*
*    Below is an example of what a ".sql" file passed to convert
*    might look like:
*-----------------------------------------------------------------------------
* { TABLE "root".pcdtablr row size = 28 number of columns = 3 index size = 21 }
* { unload file name = pcdtablr.unl number of rows = 79 }
*
* create table "root".pcdtablr
*  (
*    prodid char(8),
*    line_no char(1)
*  );
*  revoke all on "root".pcdtablr from "public";
*-----------------------------------------------------------------------------
*
*/

#include <stdio.h>
#include <malloc.h>
#include <string.h>

FILE *open_file(char *filename, char *ttype, int exit_flag);
void write_file(FILE *f, char *buf);
int in_string(char *string, char *substring);
void build_cols(FILE *f, int *s);
int find_col_no(FILE *f, int *s);
char *read_file(FILE *f, int *s);
void get_file_name(FILE *f, int *s, char *fbuf);
void strip_white(char *string);
void new_array(void);
int arg_type(char *col_type);
int process_data_file(char *tab_name, int conv_ch);
int chk_file_type(char *tab_name);
int char_type_cols(void);
char *build_csv_token(char *token, int token_no, char *tab_name);
char *build_unix_token(char *token, int token_no, char *tab_name);
char *xstrtok(char *line, char *delims);
void convert_date(char *old_date, char *new_date, int convert_flag);
void convert_dt(char *old_dt, char *new_dt, int precision, int convert_flag);
void del_char(char *string, char *c);
void replace(char *string, char new_char, char old_char, int convert_flag);

#ifdef UNIX
#define mvcmd  "mv "
#define UNIX_SW 1
#else
#define mvcmd  "rename "
#define UNIX_SW 0
#endif

#define TABLE "TABLE"
#define UNLOAD "unload"
#define EQUAL "="
#define CREATE "create table"
#define START "("
#define STOP ");"
#define PIPE_SYMBOL "|"
#define DELETE "del "

#define QUOTE "\""
#define COMMA ","
#define OUTFILE "out.fil"
#define MINUTE  "minute"
#define SECOND  "second"
#define UNIX_FMT  "%2d%c%2d%c%2d"
#define DASH_FMT  "19%02d-%02d-%02d"
#define DOS_FMT   "%4d%c%2d%c%2d"
#define SLASH_FMT "%02d/%02d/%02d"
#define REC_LEN  500       /* record length */
#define INTEGER  "integ"   /* data types */
#define SMALLINT "small"
#define SERIAL   "seria"
#define CHAR     "char("
#define DATE     "date"
#define DATE1    "date,"
#define DATETIME "datet"
#define DECIMAL  "decim"
#define FLOAT    "float"

#define CHAR_TYPE     1
#define INTEGER_TYPE  2
#define DATE_TYPE     3
#define DT_TYPE       4
#define UNK_TYPE      5
#define MIN_TYPE      1
#define SEC_TYPE      2

struct column {
char col_name[30]; /* table column name */
int dtype;         /* table column data type */
int precision;     /* datetime precision */
};
struct column *cptr;  /* array declaration */
int arr_len=9;        /* number of initial array elements*/
int arr_ele=0;        /* number of filled elements */

main(argc, argv)
int argc;
char *argv[];
{
FILE *schema;
char *filename;
char runcmd[80];
char *buf;
char tab_name[30];
int stopit=0, i, conv_ch, err_no;

if(argc >= 2)
filename = argv[1];
else
{
fprintf(stderr, "\n Usage is conv <sql file>\n");
exit(0);
}

schema = open_file(filename, "r", 1);
new_array();   /* setup the array */
while (1)
{
buf = read_file(schema, &stopit);
if(stopit)
break;
if(in_string(buf, TABLE) >= 0)
{
get_file_name(schema, &stopit, tab_name);
if(stopit)
break;
arr_ele=0;
build_cols(schema, &stopit);
if((conv_ch = chk_file_type(tab_name)) > 0)
{
fprintf(stderr, "%s ", tab_name);
err_no = process_data_file(tab_name, conv_ch);
if(err_no < 0)
{
fprintf(stderr, "process_data_file conversion %s error no: %d\n",
tab_name, err_no);
continue;
}
}
else
{
if(conv_ch == (-1))
fprintf(stderr, "can NOT open data table %s\n", tab_name);
continue;
}

if(!UNIX_SW)
{ /* If not in Unix, delete the file before renaming */
strcpy(runcmd, DELETE);
strcat(runcmd, tab_name);
if(system(runcmd))
{
fprintf(stderr, "can NOT execute: %s\n", runcmd);
continue;
}
}
strcpy(runcmd, mvcmd); /* rename or move the file */
strcat(runcmd, OUTFILE);
strcat(runcmd, " ");
strcat(runcmd, tab_name);
if(system(runcmd))
{
fprintf(stderr, "can NOT execute: %s\n", runcmd);
continue;
}
}
}
fclose(schema);
fprintf(stderr, "\nProcessing completed normally\n");
exit(1);
}

/*
*  FUNCTION NAME:  open_file()
*
*  This function opens 'filename' with 'ttype' being
*  'r' for read or 'w' for write.  If there is an open error and
*  'exit flag' is true terminate  else return the FILE pointer to the
*  open file.
*/

FILE *open_file(char *filename, char *ttype, int exit_flag)

/* char  *filename;   INPUT:  file name to open
char  *ttype;      INPUT:  type: r for read, w for write
int   exit_flag;   INPUT:  if true and error, terminate
FILE  *fp;        RETURN:  File pointer upon successfull open
(NULL otherwise)

*/
{
FILE *fp;

if((fp = fopen(filename, ttype)) == NULL)
{
fprintf(stderr,"can NOT open %s file\n", filename);
if(exit_flag)
exit(1);
} /* End of if */

return fp;

} /* End of open_file()

/*
*  FUNCTION NAME:  get_file_name()
*
* This function reads a line of input until it finds the
* string "unload" and determines the filename.
*/
void get_file_name(FILE *f, int *s, char *fbuf)
/* FILE *f    -  FILE pointer     */
/* int *s     -  end of file flag */
/* char *fbuf -  file name        */
{
char *buf;
int retval, i, cnt, len=20;

while (1)
{
buf = read_file(f, s);
if(*s)
break;
/* find the unload command */

if(in_string(buf, UNLOAD) >= 0)
{
/* find the equal sign */
if((retval = in_string(buf, EQUAL)) >= 0)
{
while (1)
/* if next position a space, ignore it*/
if(*(buf + retval + 1) == ' ')
retval++;
else
break;
cnt=0;
for(i=1; *(buf + retval + i) != ' '; i++, cnt++)
{ /* stop at the next space*/
*(fbuf + cnt) = *(buf + retval + i);
if(cnt > len)
break;
}
break;
}
}

}
*(fbuf + cnt) = '\0';
} /* End of get_file_name() */

/*
*  FUNCTION NAME:  read_file()
*
* This function reads one line of input from the
* previously opened FILE pointer 'f'.  If EOF is hit,
* set the integer 's' to true.
*/
char *read_file(FILE *f, int *s)
/* *f -  INPUT :  FILE pointer     */
/* *s -  OUTPUT:  end of file flag */
{
static char buffer[REC_LEN];
int result;

fgets(buffer, REC_LEN, f);
if(feof(f)) /* end of file */
*s=1;
result = strlen(buffer);
buffer[result - 1] = '\0';

return(buffer);
} /* End of read_file() */

/*
*   FUNCTION NAME:  write_file()
*
* This function writes one line of input from the
* previously opened FILE pointer 'f'.
*/

void write_file(FILE *f, char *buf)

/*
FILE *f;      INPUT:  file pointer to use to write to.
char *buf;    INPUT:  buffer of data to write.
*/

{
fprintf(f, "%s\n", buf);
}  /* End of write_file() */

/*
*   FUNCTION NAME:  in_string()
*
* This function returns the starting position of 'substring'
* in 'string' else -1 if substring is not found.
*/

int in_string(char *string, char *substring)

/*
char *string;      INPUT:  string to begin search on.
char *substring;   INPUT:  string to search for.
*/

{
int i, j, k;  /* Index variables */

for (i=0; *(string + i) != '\0'; i++)
for (j=i, k=0; *(substring + k) == *(string + j); k++, j++)
if (*(substring + k + 1) == '\0')
return(i);

return(-1); /* not found */

}  /* End of in_string()

/*
*   FUNCTION NAME:  build_cols()
*
* This function cycles through the create table script and after
* finding the START token, parses the column information,
* colname and column type into the column array until the STOP
* postion token is found.  If the number of columns of the cuurent
* table is greater than the allocated elements of array, a call to
* new_array() reallocates the column array.
*/

void build_cols(FILE *f, int *s)

/*
FILE *f;  INPUT:  FILE pointer
int *s;   INPUT:  end of file flag
*/

{
char *buf;
int start_flag=0, stop_flag=0, no_cols=0;
long pos;
char *token, col_type[8];

while (1)
{
buf = read_file(f, s);
if(*s)
break;
strip_white(buf);
if((strcmp(buf, START) == 0) && !start_flag)
{
start_flag++;
pos = ftell(f);
no_cols = find_col_no(f, s);
if(no_cols > arr_len)
{
arr_len += no_cols;
new_array();
}
if(fseek(f, pos, SEEK_SET))
{ /* reset the file pointer */
fprintf(stderr, "fseek error");
exit(0);
}
continue;
}  /* End of if (strcmp(buf, START)...) */
if(strcmp(buf, STOP) == 0)
stop_flag++;
if(stop_flag && start_flag) /* break when all columns read for table*/
{
arr_ele--;
break;
} /* End of if (stop_flag...) */
if(start_flag)
{
token = strtok(buf, " ");
strcpy(cptr[arr_ele].col_name, token);
token = strtok(NULL, " ");
strncpy(col_type, token, 5); /*strip off first five characters */
col_type[5] = '\0';
if((cptr[arr_ele].dtype = arg_type(col_type)) == DT_TYPE)
{  /* datetime parsing */
token = strtok(NULL, " "); /* year token */
token = strtok(NULL, " "); /* to token */
token = strtok(NULL, " "); /* second or minute token */
/* get rid of the comma if any*/
del_char(token, COMMA);
if(strcmp(token, MINUTE) == 0)
cptr[arr_ele].precision = MIN_TYPE;
if(strcmp(token, SECOND) == 0)
cptr[arr_ele].precision = SEC_TYPE;
}  /* End of if (cptr[arr_ele]...) */
arr_ele++;
}  /* End of if (start_flag) */
}  /* End of while */

return;

}  /* End of build_cols() */

/*
*   FUNCTION NAME:  find_col_no()
*
* This function reads the script file counting the
* number of columns in the create table script.  Keep
* counting until the STOP flag is found and return the
* number of columns in the table.
*/

int find_col_no(FILE *f, int *s)

/*
FILE *f;  INPUT:  FILE pointer.
int *s;   INPUT:  end of file flag.
*/

{
char *buf;
int cnter=0;

while (1)
{
buf = read_file(f, s);
strip_white(buf);
if(*s)
break;
if(strcmp(buf, STOP) == 0)
break;
cnter++;
}  /* End of while */

return(cnter);

}  /* End of find_col_no() */

/*
*  FUNCTION NAME:  new_array()
*
* This function reallocates the size of the column
* structure array based on the global integer arr_len.
*/
void new_array()
{
cptr = (struct column *)realloc(cptr, arr_len*sizeof(struct
column));
if(cptr == NULL)
{
fprintf(stderr, "reallocate error \n");
exit(1);
}
}
/*
*  FUNCTION NAME:  strip_white()
*
* This function strips leading and trailing white space
* from string.
*/
void strip_white(char *string)

{
int len;

while(1)
{
if(isspace(*string))
strcpy(string, string+1);
else
break;
}

len=strlen(string)-1;
while(isspace(*(string+len)))
len--;

*(string+len+1) = '\0';
}
/*
*  FUNCTION NAME:  arg_type()
*
* This function returns the integer value of the datatype:
*    char                - 1
*    integer or smallint - 2
*    date                - 3
*    datetime            - 4
*    unknown             - 5
*/
int arg_type(char *col_type)

{
if(strcmp(col_type, CHAR) == 0)
return CHAR_TYPE;
if((strcmp(col_type, INTEGER) == 0) ||
(strcmp(col_type, SMALLINT) == 0) ||
(strcmp(col_type, DECIMAL) == 0) ||
(strcmp(col_type, SERIAL) == 0) ||
(strcmp(col_type, FLOAT) == 0))
return INTEGER_TYPE;
if((strcmp(col_type, DATE) == 0) || (strcmp(col_type,
DATE1) == 0))
return DATE_TYPE;
if(strcmp(col_type, DATETIME) == 0)
return DT_TYPE;
return UNK_TYPE;
}
/*
*  FUNCTION NAME:  process_data_file()
*
* This function:
*    1) opens the data file, tab_name, for reading
*    2) opens the intermediate output file for writing
*    and for each record read:
*    3) parse each record into individual columns
*    4) and for each column, build the token depending on
*    whether the 'conv_ch" flag is DOS-to-Unix or Unix-to-Dos.
*/
int process_data_file(char *tab_name, int conv_ch)
/* char *tab_name;   unload file name */
/* int conv_ch;      converion choice */
{
FILE *input, *output;
char *buf;
static char *outline;
char *token, *newtoken;
int s=0;  /* end of file flag*/
int reclen, first_read=1, no_char_cols, field_no=0,
buf_cnt;
static int totlen=0;

input = open_file(tab_name, "r", 0);
if(input == NULL) /* can not open data file */
return 0;
output = open_file(OUTFILE, "w", 0);
if(output == NULL) /* can not open output file */
return -1;
buf_cnt=0;
while(1)
{
buf = read_file(input, &s);
if(s)
break;
buf_cnt++;
field_no=0;
reclen = strlen(buf);
no_char_cols = char_type_cols();
if((reclen + no_char_cols) > totlen)
{
totlen = reclen + no_char_cols + 5;
outline = (char *)realloc(outline, totlen*sizeof(char));
if(outline == NULL)
{
fprintf(stderr, "realloc error in process_data_file \n");
exit(1);
}
}
if(conv_ch == 1)
token = xstrtok(buf, PIPE_SYMBOL);
else
token = xstrtok(buf, COMMA);
while(1)
{
if(token == NULL)
break;

if(conv_ch == 1)
newtoken = build_csv_token(token, field_no, tab_name);
else
newtoken = build_unix_token(token, field_no, tab_name);

if(field_no == 0)
strcpy(outline, newtoken);
else
strcat(outline, newtoken);

if((field_no) <= arr_ele)
if(conv_ch == 1)
strcat(outline, ",");
else
strcat(outline, PIPE_SYMBOL);

if(conv_ch == 1)
token = xstrtok(NULL, PIPE_SYMBOL);
else
token = xstrtok(NULL, COMMA);
field_no++;
}
write_file(output, outline);
}
fclose(input);
fclose(output);
return(1);
}
/*
*  FUNCTION NAME:  build_csv_token()
*
* This function returns a csv token:
*   If the datatype is a character type, add a
*   leading and trailing double quote.
*   Convert the date and datetime to their proper
*   formats.
*/
char *build_csv_token(char *token, int token_no, char *tab_name)
/* char *token    - token to search for */
/* int token_no   - field number */
/* char *tab_name - Table name being operated on */
{
int newlen;
static int oldlen=0;
static char *new_token;

newlen = strlen(token);
if(newlen > oldlen || oldlen == 0)
{
oldlen = newlen + 10;
new_token = (char *)realloc(new_token, oldlen*sizeof(char));
if(new_token == NULL)
{
fprintf(stderr, "realloc error in build_csv_token \n");
exit(1);
}
}
switch(cptr[token_no].dtype)
{
/* put in the quotes for char type */
case CHAR_TYPE:
strcpy(new_token, QUOTE);
strcat(new_token, token);
strcat(new_token, QUOTE);
break;
/* just string copy the integer types */
case INTEGER_TYPE:
strcpy(new_token,token);
break;
/* convert the date type to the dash format */
case DATE_TYPE:
if(*token != '\0')
convert_date(token, new_token, 0);
else
*new_token = '\0';
break;
/* convert the datetime type */
case DT_TYPE:
if(*token != '\0')
convert_dt(token, new_token, cptr[token_no].precision, 0);
else
*new_token = '\0';
break;
otherwise:
strcpy(new_token, token);
fprintf(stderr, "Unknown data type table: %s column %s\n",
tab_name, cptr[token_no].col_name);
break;
}
return(new_token);
} /* End build_csv_token() */

/*
*  FUNCTION NAME:  build_unix_token()
*
* This function returns a unix token:
*   If the datatype is a character type, delete
*   the leading and trailing double quote.
*   Convert the date and datetime to their proper
*   formats.
*/
char *build_unix_token(char *token, int token_no, char *tab_name)
/* char *token     - found token */
/* int token_no    - token id    */
/* char *tab_name  - processing table name */
{
int newlen;
static int oldlen=0;
static char *new_token;

newlen = strlen(token);
if(newlen > oldlen)
{
oldlen = newlen + 10;
new_token = (char *)realloc(new_token, oldlen*sizeof(char));
if(new_token == NULL)
{
fprintf(stderr, "realloc error in build_csv_token \n");
exit(1);
}
}
strcpy(new_token, token);
switch(cptr[token_no].dtype)
{
case CHAR_TYPE:
/* remove the quotes for char type */
del_char(new_token, QUOTE);
break;
case INTEGER_TYPE:
/* string copy the integer types and don't change */
break;
case DATE_TYPE:
/* convert the date type */
if(*token != '\0')
convert_date(token, new_token, 1);
else
*new_token = '\0';
break;
case DT_TYPE:
/* convert the datetime type */
if(*token != '\0')
convert_dt(token, new_token, cptr[token_no].precision, 1);
else
*new_token = '\0';
break;
otherwise:
fprintf(stderr, "Unknown data type table: %s column %s\n",
tab_name, cptr[token_no].col_name);
break;
}
return(new_token);
} /* End of build_unix_token() */

/*
*  FUNCTION NAME:  chk_file_type()
*
* This function opens a file, reads the first record and
* if DELIMITER is found returns 1 for change to Unix, or 2
* for change to DOS.
*/
int chk_file_type(char *tab_name)
/* char *tab_name;  unload file name */
{
FILE *infile;
int s=0;
char *buf;

infile = open_file(tab_name, "r", 0);
if(infile == NULL) /* can not open data file */
return -1;
buf = read_file(infile, &s);
fclose(infile);
if(s)  /* no data in file */
return 0;

if(in_string(buf, PIPE_SYMBOL) >= 0)
return 1; /* change to CSV delimited */
else
return 2; /* change to Unix */
}
/*
*  FUNCTION NAME:  char_type_cols()
*
* This function counts the number of character type, date type,
* and datetime type columns in the array and returns an arbitrary
* number used to increase the length of a realloc'ed buffer.
*/
int char_type_cols()
{
int i, ret_val=0;

for(i=0; i <= arr_ele; i++)
{
switch(cptr[i].dtype)
{
case CHAR_TYPE:
ret_val++;
break;
case DATE_TYPE:
ret_val += 3;
break;
case DT_TYPE:
ret_val += 8;
break;
}
}
return(ret_val);
}
/*
*  FUNCTION NAME:  xstrtok()
*
* This function is used like the library function strtok.
* The difference is that this function will return after
* the first delimiter is found in the target string (i.e.
* considering the target string "FIELD1||FIELD3|FIELD4|",
* the first call to xstrtok() will return "FIELD1", the
* second will return "" [the NULL string], the third will
* return "FIELD3", etc.)--whereas, strtok would return
* only when data that was not a delimeter was found (i.e.
* given our same target string, "FIELD3" would have been
* the return value of the second call to strtok()).
*/
char *xstrtok(char *line, char *delims)

{
static char *saveline = '\0';
char *p;
int n;

if(line != '\0')
saveline = line;

/* stop at the end of the line */
if(saveline == '\0' || *saveline == '\0')
return('\0');
/* return the number of characters that aren't delimiters */
n = strcspn(saveline, delims);
p = saveline; /* save start of parsed token*/

saveline += n; /* step past the delimiter*/

/* step past the delimiter and terminate token*/
if(*saveline != '\0')
*saveline++ = '\0';

return(p); /* return the token*/
}
/*
*  FUNCTION NAME:  convert_date()
*
* This function converts date formats:
*   convert flag = 0: Unix to DOS
*   convert flag = 1: DOS to Unix
*/
void convert_date(char *old_date, char *new_date, int convert_flag)
/* char *old_date   -  */
/* char *new_date   -  */
/* int convert_flag -  */
{
int mm, dd, yy;
char c;

switch(convert_flag)
{
case 0:
sscanf(old_date, UNIX_FMT, &mm, &c, &dd, &c, &yy);
sprintf(new_date, DASH_FMT, yy, mm, dd);
break;
case 1:
sscanf(old_date, DOS_FMT, &yy, &c, &mm, &c, &dd);
sprintf(new_date, SLASH_FMT, mm, dd, (yy - 1900));
break;
}
}
/*
*  FUNCTION NAME:  del_char()
*
* This function deletes char 'c' from 'string'
*/
void del_char(char *string, char *c)
/* char *string  -  */
/* char *c       -  */
{
while(*string)
{
if(*string == c[0])
strcpy(string, string+1);
else
string++;
}
}
/*
*  FUNCTION NAME:  convert_dt()
*
* This function converts the datetime datatype:
* convert_flag:
*  0:  convert Unix to DOS
*  1:  convert DOS to Unix
*/
void convert_dt(char *old_dt, char *new_dt, int precision, int
convert_flag)
/* char *old_dt     - old datetime       */
/* char *new_dt     - new datetime       */
/* int precision    - datetime precision */
/* int convert_flag -                    */
{
switch(convert_flag)
{
case 0: /* Unix to DOS */
strcpy(new_dt, old_dt);
replace(new_dt,  '-', ' ', 0);
replace(new_dt,  '.', ':', 0);
if(strlen(new_dt) <= 16)
strcat(new_dt, ".00.000000");
else
strcat(new_dt, ".000000");
break;
case 1: /* DOS to Unix */
strcpy(new_dt, old_dt);
replace(new_dt,  ' ', '-', 3); /* replace third dash */
replace(new_dt,  ':', '.', 0);
if(precision == MIN_TYPE)
*(new_dt + 16) = '\0';
if(precision == SEC_TYPE)
*(new_dt + 19) = '\0';
break;
}
}
/*
*  FUNCTION NAME:  replace()
*
* This function replaces 'old_char' with 'new_char' in string.
* If the convert flag is 0, replace all occurrences of 'old_char'.
* If the flag > 0, only replace 'flag' occurrence of the character.
*/
void replace(char *string, char new_char, char old_char, int
convert_flag)
/* char *string      -                                  */
/* char new_char     - character to place               */
/* char old_char     - character to replace             */
/* int convert_flag  - 0 = replace all, >0 replace only */
{
int c_flag=0;

while(*string)
{
if(*string == old_char)
c_flag++;
if(convert_flag == 0 || convert_flag == c_flag)
if(*string == old_char)
{
*string = new_char;
if(convert_flag > 0)
break;
}
string++;
}
}