Listing 1: convert.c--The conversion tool
/* September 3, 1995
*
* FILE NAME: convert.c - Cross-Database ASCII Data Conversion Utility
*
* AUTHORS: Chuck Leonard
* Ed Schaefer
*
* Used to convert pipe symbol delimited files (INFORMIX like unload files)
* to Comma Separated Value (CSV) files and back again.
*
* PLATFORM COMPILATION NOTES:
*
* UNIX: cc -DUNIX convert.c -o convert
*
* DOS: No special notes.
*
* RUN NOTES:
*
* convert pcd.sql
*
* The file "pcd.sql" is the SQL schema like definition containing
* the schema of the data being converted and the name of the file
* containing the data.
*
* NOTE: the file name specified in the schema definition
* (the name following the "unload file name =" keyword)
* WILL be overwritten with the converted delimited data
* (i.e. if "pcdtablr.unl" were the name of the data file being
* converted, the resulting converted data would be written
* back to "pcdtablr.unl").
*
* Below is an example of what a ".sql" file passed to convert
* might look like:
*-----------------------------------------------------------------------------
* { TABLE "root".pcdtablr row size = 28 number of columns = 3 index size = 21 }
* { unload file name = pcdtablr.unl number of rows = 79 }
*
* create table "root".pcdtablr
* (
* prodid char(8),
* line_no char(1)
* );
* revoke all on "root".pcdtablr from "public";
*-----------------------------------------------------------------------------
*
*/
#include <stdio.h>
#include <malloc.h>
#include <string.h>
FILE *open_file(char *filename, char *ttype, int exit_flag);
void write_file(FILE *f, char *buf);
int in_string(char *string, char *substring);
void build_cols(FILE *f, int *s);
int find_col_no(FILE *f, int *s);
char *read_file(FILE *f, int *s);
void get_file_name(FILE *f, int *s, char *fbuf);
void strip_white(char *string);
void new_array(void);
int arg_type(char *col_type);
int process_data_file(char *tab_name, int conv_ch);
int chk_file_type(char *tab_name);
int char_type_cols(void);
char *build_csv_token(char *token, int token_no, char *tab_name);
char *build_unix_token(char *token, int token_no, char *tab_name);
char *xstrtok(char *line, char *delims);
void convert_date(char *old_date, char *new_date, int convert_flag);
void convert_dt(char *old_dt, char *new_dt, int precision, int convert_flag);
void del_char(char *string, char *c);
void replace(char *string, char new_char, char old_char, int convert_flag);
#ifdef UNIX
#define mvcmd "mv "
#define UNIX_SW 1
#else
#define mvcmd "rename "
#define UNIX_SW 0
#endif
#define TABLE "TABLE"
#define UNLOAD "unload"
#define EQUAL "="
#define CREATE "create table"
#define START "("
#define STOP ");"
#define PIPE_SYMBOL "|"
#define DELETE "del "
#define QUOTE "\""
#define COMMA ","
#define OUTFILE "out.fil"
#define MINUTE "minute"
#define SECOND "second"
#define UNIX_FMT "%2d%c%2d%c%2d"
#define DASH_FMT "19%02d-%02d-%02d"
#define DOS_FMT "%4d%c%2d%c%2d"
#define SLASH_FMT "%02d/%02d/%02d"
#define REC_LEN 500 /* record length */
#define INTEGER "integ" /* data types */
#define SMALLINT "small"
#define SERIAL "seria"
#define CHAR "char("
#define DATE "date"
#define DATE1 "date,"
#define DATETIME "datet"
#define DECIMAL "decim"
#define FLOAT "float"
#define CHAR_TYPE 1
#define INTEGER_TYPE 2
#define DATE_TYPE 3
#define DT_TYPE 4
#define UNK_TYPE 5
#define MIN_TYPE 1
#define SEC_TYPE 2
struct column {
char col_name[30]; /* table column name */
int dtype; /* table column data type */
int precision; /* datetime precision */
};
struct column *cptr; /* array declaration */
int arr_len=9; /* number of initial array elements*/
int arr_ele=0; /* number of filled elements */
main(argc, argv)
int argc;
char *argv[];
{
FILE *schema;
char *filename;
char runcmd[80];
char *buf;
char tab_name[30];
int stopit=0, i, conv_ch, err_no;
if(argc >= 2)
filename = argv[1];
else
{
fprintf(stderr, "\n Usage is conv <sql file>\n");
exit(0);
}
schema = open_file(filename, "r", 1);
new_array(); /* setup the array */
while (1)
{
buf = read_file(schema, &stopit);
if(stopit)
break;
if(in_string(buf, TABLE) >= 0)
{
get_file_name(schema, &stopit, tab_name);
if(stopit)
break;
arr_ele=0;
build_cols(schema, &stopit);
if((conv_ch = chk_file_type(tab_name)) > 0)
{
fprintf(stderr, "%s ", tab_name);
err_no = process_data_file(tab_name, conv_ch);
if(err_no < 0)
{
fprintf(stderr, "process_data_file conversion %s error no: %d\n",
tab_name, err_no);
continue;
}
}
else
{
if(conv_ch == (-1))
fprintf(stderr, "can NOT open data table %s\n", tab_name);
continue;
}
if(!UNIX_SW)
{ /* If not in Unix, delete the file before renaming */
strcpy(runcmd, DELETE);
strcat(runcmd, tab_name);
if(system(runcmd))
{
fprintf(stderr, "can NOT execute: %s\n", runcmd);
continue;
}
}
strcpy(runcmd, mvcmd); /* rename or move the file */
strcat(runcmd, OUTFILE);
strcat(runcmd, " ");
strcat(runcmd, tab_name);
if(system(runcmd))
{
fprintf(stderr, "can NOT execute: %s\n", runcmd);
continue;
}
}
}
fclose(schema);
fprintf(stderr, "\nProcessing completed normally\n");
exit(1);
}
/*
* FUNCTION NAME: open_file()
*
* This function opens 'filename' with 'ttype' being
* 'r' for read or 'w' for write. If there is an open error and
* 'exit flag' is true terminate else return the FILE pointer to the
* open file.
*/
FILE *open_file(char *filename, char *ttype, int exit_flag)
/* char *filename; INPUT: file name to open
char *ttype; INPUT: type: r for read, w for write
int exit_flag; INPUT: if true and error, terminate
FILE *fp; RETURN: File pointer upon successfull open
(NULL otherwise)
*/
{
FILE *fp;
if((fp = fopen(filename, ttype)) == NULL)
{
fprintf(stderr,"can NOT open %s file\n", filename);
if(exit_flag)
exit(1);
} /* End of if */
return fp;
} /* End of open_file()
/*
* FUNCTION NAME: get_file_name()
*
* This function reads a line of input until it finds the
* string "unload" and determines the filename.
*/
void get_file_name(FILE *f, int *s, char *fbuf)
/* FILE *f - FILE pointer */
/* int *s - end of file flag */
/* char *fbuf - file name */
{
char *buf;
int retval, i, cnt, len=20;
while (1)
{
buf = read_file(f, s);
if(*s)
break;
/* find the unload command */
if(in_string(buf, UNLOAD) >= 0)
{
/* find the equal sign */
if((retval = in_string(buf, EQUAL)) >= 0)
{
while (1)
/* if next position a space, ignore it*/
if(*(buf + retval + 1) == ' ')
retval++;
else
break;
cnt=0;
for(i=1; *(buf + retval + i) != ' '; i++, cnt++)
{ /* stop at the next space*/
*(fbuf + cnt) = *(buf + retval + i);
if(cnt > len)
break;
}
break;
}
}
}
*(fbuf + cnt) = '\0';
} /* End of get_file_name() */
/*
* FUNCTION NAME: read_file()
*
* This function reads one line of input from the
* previously opened FILE pointer 'f'. If EOF is hit,
* set the integer 's' to true.
*/
char *read_file(FILE *f, int *s)
/* *f - INPUT : FILE pointer */
/* *s - OUTPUT: end of file flag */
{
static char buffer[REC_LEN];
int result;
fgets(buffer, REC_LEN, f);
if(feof(f)) /* end of file */
*s=1;
result = strlen(buffer);
buffer[result - 1] = '\0';
return(buffer);
} /* End of read_file() */
/*
* FUNCTION NAME: write_file()
*
* This function writes one line of input from the
* previously opened FILE pointer 'f'.
*/
void write_file(FILE *f, char *buf)
/*
FILE *f; INPUT: file pointer to use to write to.
char *buf; INPUT: buffer of data to write.
*/
{
fprintf(f, "%s\n", buf);
} /* End of write_file() */
/*
* FUNCTION NAME: in_string()
*
* This function returns the starting position of 'substring'
* in 'string' else -1 if substring is not found.
*/
int in_string(char *string, char *substring)
/*
char *string; INPUT: string to begin search on.
char *substring; INPUT: string to search for.
*/
{
int i, j, k; /* Index variables */
for (i=0; *(string + i) != '\0'; i++)
for (j=i, k=0; *(substring + k) == *(string + j); k++, j++)
if (*(substring + k + 1) == '\0')
return(i);
return(-1); /* not found */
} /* End of in_string()
/*
* FUNCTION NAME: build_cols()
*
* This function cycles through the create table script and after
* finding the START token, parses the column information,
* colname and column type into the column array until the STOP
* postion token is found. If the number of columns of the cuurent
* table is greater than the allocated elements of array, a call to
* new_array() reallocates the column array.
*/
void build_cols(FILE *f, int *s)
/*
FILE *f; INPUT: FILE pointer
int *s; INPUT: end of file flag
*/
{
char *buf;
int start_flag=0, stop_flag=0, no_cols=0;
long pos;
char *token, col_type[8];
while (1)
{
buf = read_file(f, s);
if(*s)
break;
strip_white(buf);
if((strcmp(buf, START) == 0) && !start_flag)
{
start_flag++;
pos = ftell(f);
no_cols = find_col_no(f, s);
if(no_cols > arr_len)
{
arr_len += no_cols;
new_array();
}
if(fseek(f, pos, SEEK_SET))
{ /* reset the file pointer */
fprintf(stderr, "fseek error");
exit(0);
}
continue;
} /* End of if (strcmp(buf, START)...) */
if(strcmp(buf, STOP) == 0)
stop_flag++;
if(stop_flag && start_flag) /* break when all columns read for table*/
{
arr_ele--;
break;
} /* End of if (stop_flag...) */
if(start_flag)
{
token = strtok(buf, " ");
strcpy(cptr[arr_ele].col_name, token);
token = strtok(NULL, " ");
strncpy(col_type, token, 5); /*strip off first five characters */
col_type[5] = '\0';
if((cptr[arr_ele].dtype = arg_type(col_type)) == DT_TYPE)
{ /* datetime parsing */
token = strtok(NULL, " "); /* year token */
token = strtok(NULL, " "); /* to token */
token = strtok(NULL, " "); /* second or minute token */
/* get rid of the comma if any*/
del_char(token, COMMA);
if(strcmp(token, MINUTE) == 0)
cptr[arr_ele].precision = MIN_TYPE;
if(strcmp(token, SECOND) == 0)
cptr[arr_ele].precision = SEC_TYPE;
} /* End of if (cptr[arr_ele]...) */
arr_ele++;
} /* End of if (start_flag) */
} /* End of while */
return;
} /* End of build_cols() */
/*
* FUNCTION NAME: find_col_no()
*
* This function reads the script file counting the
* number of columns in the create table script. Keep
* counting until the STOP flag is found and return the
* number of columns in the table.
*/
int find_col_no(FILE *f, int *s)
/*
FILE *f; INPUT: FILE pointer.
int *s; INPUT: end of file flag.
*/
{
char *buf;
int cnter=0;
while (1)
{
buf = read_file(f, s);
strip_white(buf);
if(*s)
break;
if(strcmp(buf, STOP) == 0)
break;
cnter++;
} /* End of while */
return(cnter);
} /* End of find_col_no() */
/*
* FUNCTION NAME: new_array()
*
* This function reallocates the size of the column
* structure array based on the global integer arr_len.
*/
void new_array()
{
cptr = (struct column *)realloc(cptr, arr_len*sizeof(struct
column));
if(cptr == NULL)
{
fprintf(stderr, "reallocate error \n");
exit(1);
}
}
/*
* FUNCTION NAME: strip_white()
*
* This function strips leading and trailing white space
* from string.
*/
void strip_white(char *string)
{
int len;
while(1)
{
if(isspace(*string))
strcpy(string, string+1);
else
break;
}
len=strlen(string)-1;
while(isspace(*(string+len)))
len--;
*(string+len+1) = '\0';
}
/*
* FUNCTION NAME: arg_type()
*
* This function returns the integer value of the datatype:
* char - 1
* integer or smallint - 2
* date - 3
* datetime - 4
* unknown - 5
*/
int arg_type(char *col_type)
{
if(strcmp(col_type, CHAR) == 0)
return CHAR_TYPE;
if((strcmp(col_type, INTEGER) == 0) ||
(strcmp(col_type, SMALLINT) == 0) ||
(strcmp(col_type, DECIMAL) == 0) ||
(strcmp(col_type, SERIAL) == 0) ||
(strcmp(col_type, FLOAT) == 0))
return INTEGER_TYPE;
if((strcmp(col_type, DATE) == 0) || (strcmp(col_type,
DATE1) == 0))
return DATE_TYPE;
if(strcmp(col_type, DATETIME) == 0)
return DT_TYPE;
return UNK_TYPE;
}
/*
* FUNCTION NAME: process_data_file()
*
* This function:
* 1) opens the data file, tab_name, for reading
* 2) opens the intermediate output file for writing
* and for each record read:
* 3) parse each record into individual columns
* 4) and for each column, build the token depending on
* whether the 'conv_ch" flag is DOS-to-Unix or Unix-to-Dos.
*/
int process_data_file(char *tab_name, int conv_ch)
/* char *tab_name; unload file name */
/* int conv_ch; converion choice */
{
FILE *input, *output;
char *buf;
static char *outline;
char *token, *newtoken;
int s=0; /* end of file flag*/
int reclen, first_read=1, no_char_cols, field_no=0,
buf_cnt;
static int totlen=0;
input = open_file(tab_name, "r", 0);
if(input == NULL) /* can not open data file */
return 0;
output = open_file(OUTFILE, "w", 0);
if(output == NULL) /* can not open output file */
return -1;
buf_cnt=0;
while(1)
{
buf = read_file(input, &s);
if(s)
break;
buf_cnt++;
field_no=0;
reclen = strlen(buf);
no_char_cols = char_type_cols();
if((reclen + no_char_cols) > totlen)
{
totlen = reclen + no_char_cols + 5;
outline = (char *)realloc(outline, totlen*sizeof(char));
if(outline == NULL)
{
fprintf(stderr, "realloc error in process_data_file \n");
exit(1);
}
}
if(conv_ch == 1)
token = xstrtok(buf, PIPE_SYMBOL);
else
token = xstrtok(buf, COMMA);
while(1)
{
if(token == NULL)
break;
if(conv_ch == 1)
newtoken = build_csv_token(token, field_no, tab_name);
else
newtoken = build_unix_token(token, field_no, tab_name);
if(field_no == 0)
strcpy(outline, newtoken);
else
strcat(outline, newtoken);
if((field_no) <= arr_ele)
if(conv_ch == 1)
strcat(outline, ",");
else
strcat(outline, PIPE_SYMBOL);
if(conv_ch == 1)
token = xstrtok(NULL, PIPE_SYMBOL);
else
token = xstrtok(NULL, COMMA);
field_no++;
}
write_file(output, outline);
}
fclose(input);
fclose(output);
return(1);
}
/*
* FUNCTION NAME: build_csv_token()
*
* This function returns a csv token:
* If the datatype is a character type, add a
* leading and trailing double quote.
* Convert the date and datetime to their proper
* formats.
*/
char *build_csv_token(char *token, int token_no, char *tab_name)
/* char *token - token to search for */
/* int token_no - field number */
/* char *tab_name - Table name being operated on */
{
int newlen;
static int oldlen=0;
static char *new_token;
newlen = strlen(token);
if(newlen > oldlen || oldlen == 0)
{
oldlen = newlen + 10;
new_token = (char *)realloc(new_token, oldlen*sizeof(char));
if(new_token == NULL)
{
fprintf(stderr, "realloc error in build_csv_token \n");
exit(1);
}
}
switch(cptr[token_no].dtype)
{
/* put in the quotes for char type */
case CHAR_TYPE:
strcpy(new_token, QUOTE);
strcat(new_token, token);
strcat(new_token, QUOTE);
break;
/* just string copy the integer types */
case INTEGER_TYPE:
strcpy(new_token,token);
break;
/* convert the date type to the dash format */
case DATE_TYPE:
if(*token != '\0')
convert_date(token, new_token, 0);
else
*new_token = '\0';
break;
/* convert the datetime type */
case DT_TYPE:
if(*token != '\0')
convert_dt(token, new_token, cptr[token_no].precision, 0);
else
*new_token = '\0';
break;
otherwise:
strcpy(new_token, token);
fprintf(stderr, "Unknown data type table: %s column %s\n",
tab_name, cptr[token_no].col_name);
break;
}
return(new_token);
} /* End build_csv_token() */
/*
* FUNCTION NAME: build_unix_token()
*
* This function returns a unix token:
* If the datatype is a character type, delete
* the leading and trailing double quote.
* Convert the date and datetime to their proper
* formats.
*/
char *build_unix_token(char *token, int token_no, char *tab_name)
/* char *token - found token */
/* int token_no - token id */
/* char *tab_name - processing table name */
{
int newlen;
static int oldlen=0;
static char *new_token;
newlen = strlen(token);
if(newlen > oldlen)
{
oldlen = newlen + 10;
new_token = (char *)realloc(new_token, oldlen*sizeof(char));
if(new_token == NULL)
{
fprintf(stderr, "realloc error in build_csv_token \n");
exit(1);
}
}
strcpy(new_token, token);
switch(cptr[token_no].dtype)
{
case CHAR_TYPE:
/* remove the quotes for char type */
del_char(new_token, QUOTE);
break;
case INTEGER_TYPE:
/* string copy the integer types and don't change */
break;
case DATE_TYPE:
/* convert the date type */
if(*token != '\0')
convert_date(token, new_token, 1);
else
*new_token = '\0';
break;
case DT_TYPE:
/* convert the datetime type */
if(*token != '\0')
convert_dt(token, new_token, cptr[token_no].precision, 1);
else
*new_token = '\0';
break;
otherwise:
fprintf(stderr, "Unknown data type table: %s column %s\n",
tab_name, cptr[token_no].col_name);
break;
}
return(new_token);
} /* End of build_unix_token() */
/*
* FUNCTION NAME: chk_file_type()
*
* This function opens a file, reads the first record and
* if DELIMITER is found returns 1 for change to Unix, or 2
* for change to DOS.
*/
int chk_file_type(char *tab_name)
/* char *tab_name; unload file name */
{
FILE *infile;
int s=0;
char *buf;
infile = open_file(tab_name, "r", 0);
if(infile == NULL) /* can not open data file */
return -1;
buf = read_file(infile, &s);
fclose(infile);
if(s) /* no data in file */
return 0;
if(in_string(buf, PIPE_SYMBOL) >= 0)
return 1; /* change to CSV delimited */
else
return 2; /* change to Unix */
}
/*
* FUNCTION NAME: char_type_cols()
*
* This function counts the number of character type, date type,
* and datetime type columns in the array and returns an arbitrary
* number used to increase the length of a realloc'ed buffer.
*/
int char_type_cols()
{
int i, ret_val=0;
for(i=0; i <= arr_ele; i++)
{
switch(cptr[i].dtype)
{
case CHAR_TYPE:
ret_val++;
break;
case DATE_TYPE:
ret_val += 3;
break;
case DT_TYPE:
ret_val += 8;
break;
}
}
return(ret_val);
}
/*
* FUNCTION NAME: xstrtok()
*
* This function is used like the library function strtok.
* The difference is that this function will return after
* the first delimiter is found in the target string (i.e.
* considering the target string "FIELD1||FIELD3|FIELD4|",
* the first call to xstrtok() will return "FIELD1", the
* second will return "" [the NULL string], the third will
* return "FIELD3", etc.)--whereas, strtok would return
* only when data that was not a delimeter was found (i.e.
* given our same target string, "FIELD3" would have been
* the return value of the second call to strtok()).
*/
char *xstrtok(char *line, char *delims)
{
static char *saveline = '\0';
char *p;
int n;
if(line != '\0')
saveline = line;
/* stop at the end of the line */
if(saveline == '\0' || *saveline == '\0')
return('\0');
/* return the number of characters that aren't delimiters */
n = strcspn(saveline, delims);
p = saveline; /* save start of parsed token*/
saveline += n; /* step past the delimiter*/
/* step past the delimiter and terminate token*/
if(*saveline != '\0')
*saveline++ = '\0';
return(p); /* return the token*/
}
/*
* FUNCTION NAME: convert_date()
*
* This function converts date formats:
* convert flag = 0: Unix to DOS
* convert flag = 1: DOS to Unix
*/
void convert_date(char *old_date, char *new_date, int convert_flag)
/* char *old_date - */
/* char *new_date - */
/* int convert_flag - */
{
int mm, dd, yy;
char c;
switch(convert_flag)
{
case 0:
sscanf(old_date, UNIX_FMT, &mm, &c, &dd, &c, &yy);
sprintf(new_date, DASH_FMT, yy, mm, dd);
break;
case 1:
sscanf(old_date, DOS_FMT, &yy, &c, &mm, &c, &dd);
sprintf(new_date, SLASH_FMT, mm, dd, (yy - 1900));
break;
}
}
/*
* FUNCTION NAME: del_char()
*
* This function deletes char 'c' from 'string'
*/
void del_char(char *string, char *c)
/* char *string - */
/* char *c - */
{
while(*string)
{
if(*string == c[0])
strcpy(string, string+1);
else
string++;
}
}
/*
* FUNCTION NAME: convert_dt()
*
* This function converts the datetime datatype:
* convert_flag:
* 0: convert Unix to DOS
* 1: convert DOS to Unix
*/
void convert_dt(char *old_dt, char *new_dt, int precision, int
convert_flag)
/* char *old_dt - old datetime */
/* char *new_dt - new datetime */
/* int precision - datetime precision */
/* int convert_flag - */
{
switch(convert_flag)
{
case 0: /* Unix to DOS */
strcpy(new_dt, old_dt);
replace(new_dt, '-', ' ', 0);
replace(new_dt, '.', ':', 0);
if(strlen(new_dt) <= 16)
strcat(new_dt, ".00.000000");
else
strcat(new_dt, ".000000");
break;
case 1: /* DOS to Unix */
strcpy(new_dt, old_dt);
replace(new_dt, ' ', '-', 3); /* replace third dash */
replace(new_dt, ':', '.', 0);
if(precision == MIN_TYPE)
*(new_dt + 16) = '\0';
if(precision == SEC_TYPE)
*(new_dt + 19) = '\0';
break;
}
}
/*
* FUNCTION NAME: replace()
*
* This function replaces 'old_char' with 'new_char' in string.
* If the convert flag is 0, replace all occurrences of 'old_char'.
* If the flag > 0, only replace 'flag' occurrence of the character.
*/
void replace(char *string, char new_char, char old_char, int
convert_flag)
/* char *string - */
/* char new_char - character to place */
/* char old_char - character to replace */
/* int convert_flag - 0 = replace all, >0 replace only */
{
int c_flag=0;
while(*string)
{
if(*string == old_char)
c_flag++;
if(convert_flag == 0 || convert_flag == c_flag)
if(*string == old_char)
{
*string = new_char;
if(convert_flag > 0)
break;
}
string++;
}
}
|