reading input from text file into array of structures in c

fatih picture fatih · Oct 24, 2012 · Viewed 9.8k times · Source

My structure definition is,

typedef struct {
    int taxid;
    int geneid;
    char goid[20];
    char evidence[4];
    char qualifier[20];
    char goterm[50];
    char pubmed;
    char category[20];    
} gene2go;

I have tab-seperated text file called `"gene2go.txt".

Each line of this file contains taxID, geneID, goID, evidence, qualifier, goterm, pubmed and category information.

Each line of the file will be kept in a structure.

When the program is run, it will first read the content of the input file into an array of type gene2go, I used a function called readInfo.

The program will also take the following input arguments from the command line,

input type ( 1 for taxid, 2 for geneid, 3 for goid ) and search term

There is a function called listData to write the list of lines in the file “gene2go.txt” that match the input type and search term, to the file “output.txt”.

Here is a part of my text file "gene2go.txt",

3702    814629  GO:0003676  IEA -   nucleic acid binding    -   Function
3702    814629  GO:0005575  ND  -   cellular_component  -   Component
3702    814629  GO:0005634  ISM -   nucleus -   Component
3702    814629  GO:0008150  ND  -   biological_process  -   Process
3702    814629  GO:0008270  IEA -   zinc ion binding    -   Function
3702    814630  GO:0005634  ISM -   nucleus -   Component
3702    814636  GO:0008150  ND  -   biological_process  -   Process
3702    814637  GO:0003674  ND  -   molecular_function  -   Function
6239    177883  GO:0008150  ND  -   biological_process  -   Process
6239    177884  GO:0005575  ND  -   cellular_component  -   Component
6239    177884  GO:0008150  ND  -   biological_process  -   Process
6239    177886  GO:0004364  IDA -   glutathione transferase activity    12757851    Function
6239    177886  GO:0005575  ND  -   cellular_component  -   Component
7955    555450  GO:0005634  IEA -   nucleus -   Component
7955    555450  GO:0006355  IEA -   regulation of transcription, DNA-dependent  -   Process

I have written the code below named ceng301.c and compiled it using the command line

gcc ceng301.c -o ceng301

, but when I write

ceng301 1 3702

in the command line, I get nothing, but a blinking underscore :( instead of

3702    814629  GO:0003676  IEA -   nucleic acid binding    -   Function
3702    814629  GO:0005575  ND  -   cellular_component  -   Component
3702    814629  GO:0005634  ISM -   nucleus -   Component
3702    814629  GO:0008150  ND  -   biological_process  -   Process
3702    814629  GO:0008270  IEA -   zinc ion binding    -   Function
3702    814630  GO:0005634  ISM -   nucleus -   Component
3702    814636  GO:0008150  ND  -   biological_process  -   Process
3702    814637  GO:0003674  ND  -   molecular_function  -   Function

saved in output.txt

Here is the code,

#include <stdio.h>
#include <stdlib.h>

/* structure definition */
typedef struct {
    int taxid;
    int geneid;
    char goid[20];
    char evidence[4];
    char qualifier[20];
    char goterm[50];
    char *pubmed;
    char category[20];
} gene2go;

/* function prototypes */
int readInfo( gene2go input[] );
void listData( char *inType, char *searchItem, gene2go input[], int i );

int main( int argc, char *argv[] )
{
   gene2go input[200];
   int i;

   i = readInfo( input );
   listData( argv[1], argv[2], input, i );

   return 0;
}

/* read the input file*/
int readInfo( gene2go input[] ) {
   FILE *fin;
   char *inputName = "gene2go.txt";
   int i = 0;

   fin = fopen( inputName, "r" );

   if( fin == NULL ) {
      printf( "File cannot be opened\n" );
   } /* end if */
   else {
      while( !feof( fin ) ) {
        fscanf( fin, "%[^\t]", &input[i].taxid,
                               &input[i].geneid,
                               &input[i].goid,
                               &input[i].evidence,
                               &input[i].qualifier,
                               &input[i].goterm,
                               &input[i].pubmed,
                               &input[i].category );
        i++;
     } /* end while */

     fclose( fin );
  } /* end else */

  return i;
} /* end function readInfo */

void listData( char *inType, char* searchItem, gene2go input[], int i ) {
   FILE *fout;
   char *outputName = "output.txt";
   int j;
   int inputType = atoi( inType );

   fout = fopen( outputName, "w" );

   switch( inputType ) {
      case 1:
          for( j = 0; j < i; j++ ) {
               if( input[j].taxid == atoi( searchItem ) ) {
                   fprintf( fout, "%d\t%d\t%s\t%s\t%s\t%s\t%s\t%s\n", input[i].taxid,
                                                                      input[i].geneid,
                                                                      input[i].goid,
                                                                      input[i].evidence,
                                                                      input[i].qualifier,
                                                                      input[i].goterm,
                                                                      input[i].pubmed,
                                                                      input[i].category );
               } /* end if */
          } /* end for */
          break;
     case 2:
          for( j = 0; j < i; j++ ) {
              if( input[j].geneid == atoi( searchItem ) ) {
                  fprintf( fout, "%d\t%d\t%s\t%s\t%s\t%s\t%s\t%s\n", input[i].taxid,
                                                                     input[i].geneid,
                                                                     input[i].goid,
                                                                     input[i].evidence,
                                                                     input[i].qualifier,
                                                                     input[i].goterm,
                                                                     input[i].pubmed,
                                                                     input[i].category );
              } /* end if */
          } /* end for */
          break;
     case 3:
          for( j = 0; j < i; j++ ) {
              if( input[j].goid == searchItem ) {
                  fprintf( fout, "%d\t%d\t%s\t%s\t%s\t%s\t%s\t%s\n", input[i].taxid,
                                                                     input[i].geneid,
                                                                     input[i].goid,
                                                                     input[i].evidence,
                                                                     input[i].qualifier,
                                                                     input[i].goterm,
                                                                     input[i].pubmed,
                                                                     input[i].category );
              } /* end if */
          } /* end for */
          break;
 } /* end switch */

 fclose( fout );
} /* end function listData */

What should I do?

Answer

Anirudh Ramanathan picture Anirudh Ramanathan · Oct 24, 2012
char mystring[100];
FILE *p = fopen ("gene2go.txt" , "r");
if (p == NULL) perror ("Error opening file");
   else {
     if ( fgets (mystring , 100 , pFile) != NULL )
       puts (mystring);
      pch = strtok (mystring, "\t");
      while (pch != NULL)
      {
          //handle each token here and insert into struct
          pch = strtok (NULL, "\t");
      }
     fclose (pFile);
   }
   return 0;

Refer to strtok, fgets