large datasets!!

Collapse
X
 
  • Time
  • Show
Clear All
new posts
  • pilafi
    New Member
    • Nov 2007
    • 15

    large datasets!!

    Hallo guys!!I have a problem and i would like your help.I have to make a reader for a platform,where reads csv files.So, i made this read and its possible to read now csv files.But the problem is that the size of some csv files is 300MB the largest one.In that case my program breaks!I include with my question the reading part of my code and a test data i create just to see how the data looks like!!

    reading part::::::::::: ::::::::::::::: ::::::::::::::: ::::::::::::::: ::::::::::::::: ::::::::::::::: ::::::::::::

    [CODE=cpp]#ifdef _MSC_VER
    #pragma warning ( disable : 4786 )
    #endif
    #include <iostream>
    #include <stdio.h>
    #include <ctype.h>
    #include <stdlib.h>
    #include <string.h>
    #include "itkExceptionOb ject.h"
    #include "itkIOCommo n.h"
    #include "itkCSVImageIO. h"
    #include "itksys/SystemTools.hxx "
    #include "stdafx.h"

    int get_next_double (char **str, double *d)
    {
    char start=0;
    char isfloat=0;
    char tmp_num[10];
    int i=0,j=0;
    char *end;

    memset(tmp_num, 0,10);

    end=(*str)+strl en(*str);

    while(((*str)++ )<=end)
    {
    if( isdigit(**str) )
    //if (((**str)>=0) && ((**str)<=9))


    {
    start=1;
    tmp_num[j] = **str;
    j=j+1;
    }
    //if( ispunct(**str) && start==1)
    if ((**str)=='.' && start==1)

    {
    isfloat=1;
    tmp_num[j] = **str;
    j=j+1;
    }

    if( (**str==' ') || (**str==0) )
    {
    if(isfloat==1 && start==1)
    {
    *d=atof(tmp_num );
    return 0;
    }
    if(*(*str+1)==0 )
    {
    return 1;
    }
    start=0;
    isfloat=0;
    j=0;
    memset(tmp_num, 0,10);
    }

    }

    return 1;
    }

    int count_numbers(c onst char *str)
    {

    char *tmp = 0;
    tmp=(char*)str;
    int counter=0;


    while(*tmp!='\n ')
    {
    if (*tmp=='.')
    {counter=counte r+1;}
    tmp=tmp+1;
    }
    return counter;


    }

    namespace itk
    {

    CSVImageIO::CSV ImageIO()
    {
    this->SetNumberOfDim ensions(3); // CSV is 3D.
    this->SetNumberOfCom ponents(1); // CSV only has one component.
    m_ByteOrder = LittleEndian;
    m_FileType = ASCII;

    }

    CSVImageIO::~CS VImageIO()
    {
    }

    void CSVImageIO::Pri ntSelf(std::ost ream& os, Indent indent) const
    {
    Superclass::Pri ntSelf(os, indent);
    }

    bool CSVImageIO::Can ReadFile( const char* filename )
    {
    std::cout << "CSVImageIO::Ca nReadFile() " << std::endl;
    //
    // If the file exists, and have extension .csv, then we are good to read it.
    //
    if( !itksys::System Tools::FileExis ts( filename ) )
    {
    std::cout << "File doesn't exist" << std::endl;
    return false;
    }
    std::cout << itksys::SystemT ools::GetFilena meLastExtension ( filename ) << std::endl;
    if( itksys::SystemT ools::GetFilena meLastExtension ( filename ) != ".csv" )
    {
    std::cout << "Wrong extension" << std::endl;
    return false;
    }

    return true;
    }


    void CSVImageIO::Rea dImageInformati on()
    {
    char onedataline[300000];
    char headerLine[100];
    _CRT_FLOAT ftemp;
    _CRT_FLOAT ScanningLength;
    _CRT_FLOAT ScanningResolut ion;
    _CRT_FLOAT IndexLength;
    _CRT_FLOAT IndexResolution ;
    char str[10];
    int retval=0;

    // CSV only reads 8-bits unsigned short images.
    this->SetPixelType ( SCALAR );
    this->SetComponentTy pe( USHORT );

    // read our own information
    m_InputStream.o pen(this->m_FileName.c_s tr(), std::ios::in);
    m_InputStream.s eekg(0, std::ios::beg);
    for (int i=0; i<33; i++)
    {
    m_InputStream.g etline(headerLi ne, 100, '\n');
    }
    m_InputStream.g etline(headerLi ne, 100, ':');
    m_InputStream.g etline(headerLi ne, 100, '\n');
    strcpy(str,head erLine);
    retval=_atoflt( &ScanningLength ,str);

    m_InputStream.g etline(headerLi ne, 100, ':');
    m_InputStream.g etline(headerLi ne, 100, '\n');
    strcpy(str,head erLine);
    retval=_atoflt( &ScanningResolu tion,str);

    m_InputStream.g etline(headerLi ne, 100, '\n');

    m_InputStream.g etline(headerLi ne, 100, ':');
    m_InputStream.g etline(headerLi ne, 100, '\n');
    strcpy(str,head erLine);
    retval=_atoflt( &IndexLength,st r);

    m_InputStream.g etline(headerLi ne, 100, ':');
    m_InputStream.g etline(headerLi ne, 100, '\n');
    strcpy(str,head erLine);
    retval=_atoflt( &IndexResolutio n,str);

    m_InputStream.g etline(headerLi ne, 100, '\n');

    m_InputStream.g etline(headerLi ne, 100, '\n');

    m_InputStream.g etline(headerLi ne, 100, ':');
    m_InputStream.g etline(headerLi ne, 100, '\n');
    strcpy(str,head erLine);
    retval=_atoflt( &ftemp,str);

    m_InputStream.g etline(headerLi ne, 100, ':');
    m_InputStream.g etline(headerLi ne, 100, '\n');
    strcpy(str,head erLine);
    retval=_atoflt( &ftemp,str);

    m_InputStream.g etline(headerLi ne, 100, ':');
    m_InputStream.g etline(headerLi ne, 100, '\n');
    strcpy(str,head erLine);
    retval=_atoflt( &ftemp,str);

    m_InputStream.g etline(headerLi ne, 100, '\n');
    m_InputStream.g etline(headerLi ne, 100, '\n');
    m_InputStream.g etline(headerLi ne, 100, '\n');

    //data follows here
    data_position = m_InputStream.t ellg();
    m_InputStream.g etline(onedatal ine, 300000, '\n');


    this->SetOrigin( 0, 0.0 );
    this->SetOrigin( 1, 0.0 );
    this->SetOrigin( 2, 0.0 );

    this->SetDimension s( 0, count_numbers(o nedataline)-1);
    this->SetDimension s( 1, ((ScanningLengt h.f)/(ScanningResolu tion.f)) );
    this->SetDimension s( 2, ((IndexLength.f )/(IndexResolutio n.f)) );

    this->SetSpacing( 0, ((0.0027)*100) );
    this->SetSpacing( 1, ((ScanningResol ution.f)*100) );
    this->SetSpacing( 2, ((IndexResoluti on.f)*100) );

    m_InputStream.c lose();
    }


    void CSVImageIO::Rea d( void * buffer)
    {
    char onedataline[300000];
    char *tmp=0;
    double d=0;
    unsigned short data=0;
    unsigned short * inptr = static_cast< unsigned short * >( buffer );
    unsigned short * tmp_buf=0;

    //copy the pointer to the buffer, because we are going to change the pointer soon
    tmp_buf=inptr;

    //print the data for check
    std::ofstream m_OutputStream( "D:\\MIP\\outda ta.csv");

    //open file
    this->m_InputStream. open(this->m_FileName.c_s tr(), std::ios::in);

    //seek to data position
    m_InputStream.s eekg(data_posit ion);

    //clear temporay buffer that hold a complete line
    //memset(onedatal ine,0,65000);

    //read in one data line
    //m_InputStream.g etline(onedatal ine, 65000, '\n');

    //const unsigned int m = this->GetDimension s( 0 );
    //const unsigned int n = this->GetDimension s( 1 );
    //const unsigned int o = this->GetDimension s( 2 );

    int i=0,j=0,k=0;

    //memset(inptr,0, m*n*o);

    do
    {
    memset(onedatal ine,0,300000);
    m_InputStream.g etline(onedatal ine, 300000, '\n');
    tmp=(char*)oned ataline;
    while(get_next_ double(&tmp,&d) ==0)
    {
    data=(unsigned short)((double) fabs(d)*10000);
    *(tmp_buf++)=da ta;
    i++;
    m_OutputStream << data << " ";
    }

    m_OutputStream <<endl;

    //m_InputStream.g etline(onedatal ine, 65000, '\n');
    }while(!m_Input Stream.eof());

    std::cout << "data read: " << i << std::endl;
    /*
    const unsigned int nx = this->GetDimension s( 0 );
    const unsigned int ny = this->GetDimension s( 1 );
    const unsigned int nz = this->GetDimension s( 2 );

    ImageIORegion regionToRead = this->GetIORegion( );

    ImageIORegion:: SizeType size = regionToRead.Ge tSize();
    ImageIORegion:: IndexType start = regionToRead.Ge tIndex();

    const unsigned int mx = size[0];
    const unsigned int my = size[1];
    const unsigned int mz = size[2];

    const unsigned int sx = start[0];
    const unsigned int sy = start[1];
    const unsigned int sz = start[2];

    unsigned short * inptr = static_cast< unsigned short * >( buffer );

    //seek to data

    //read data
    //store into buffer
    */
    this->m_InputStream. close();
    }


    bool CSVImageIO::Can WriteFile( const char * name )
    {
    //
    // CSV is not affraid of writing either !!
    //
    return true;
    }


    void
    CSVImageIO
    ::WriteImageInf ormation(void)
    {
    // add writing here
    }


    /**
    *
    */
    void
    CSVImageIO
    ::Write( const void* buffer)
    {
    }

    /** Given a requested region, determine what could be the region that we can
    * read from the file. This is called the streamable region, which will be
    * smaller than the LargestPossible Region and greater or equal to the
    RequestedRegion */
    ImageIORegion
    CSVImageIO
    ::CalculateStre amableReadRegio nFromRequestedR egion( const ImageIORegion & requested ) const
    {
    std::cout << "CSVImageIO::Ca lculateStreamab leReadRegionFro mRequestedRegio n()" << std::endl;
    std::cout << "Requested region = " << requested << std::endl;
    //
    // CSV is the ultimate streamer.
    //
    ImageIORegion streamableRegio n = requested;

    std::cout << "StreamableRegi on = " << streamableRegio n << std::endl;

    return streamableRegio n;
    }


    } // end namespace itk
    [/CODE]

    and the test data::::::::::: ::::::::::::::: ::::::::::::::: ::::::::::::::: ::::::::::::::: ::::::::::::::: :::::


    _____________C-Scan Settings_______ _______________ ____
    Distance Unit: mm
    Scanning Axis: 0
    Scanning Length: 1.000
    Scanning Resolution: 0.2500
    Index Axis: 1
    Index Length: 1.000
    Index Resolution: 0.2500
    _____________Ou tput Waveform Settings_______ _____________
    Data Ch: 1
    Waveform Start(us): 7.182
    Waveform Length(#): 0.000
    Waveform Sampling Rate(MHz): 4000.000

    ______Output Data (Format for each row: index Number, scanning number, Waveform)______

    0 0 0.0000 1.0000 0.0000 0.0000 2.0000 0.0000 0.0000 0.0000 3.0000 0.0000 0.0000
    0 1 0.0000 1.0000 0.0000 0.0000 2.0000 0.0000 0.0000 0.0000 3.0000 0.0000 0.0000
    0 2 0.0000 1.0000 0.0000 0.0000 2.0000 0.0000 0.0000 0.0000 3.0000 0.0000 0.0000
    0 3 0.0000 1.0000 0.0000 0.0000 2.0000 0.0000 0.0000 0.0000 3.0000 0.0000 0.0000
    1 0 0.0000 1.0000 0.0000 0.0000 2.0000 0.0000 0.0000 0.0000 3.0000 0.0000 0.0000
    1 1 0.0000 1.0000 0.0000 0.0000 2.0000 0.0000 0.0000 0.0000 3.0000 0.0000 0.0000
    1 2 0.0000 1.0000 0.0000 0.0000 2.0000 0.0000 0.0000 0.0000 3.0000 0.0000 0.0000
    1 3 0.0000 1.0000 0.0000 0.0000 2.0000 0.0000 0.0000 0.0000 3.0000 0.0000 0.0000
    2 0 0.0000 1.0000 0.0000 0.0000 2.0000 0.0000 0.0000 0.0000 3.0000 0.0000 0.0000
    2 1 0.0000 1.0000 0.0000 0.0000 2.0000 0.0000 0.0000 0.0000 3.0000 0.0000 0.0000
    2 2 0.0000 1.0000 0.0000 0.0000 2.0000 0.0000 0.0000 0.0000 3.0000 0.0000 0.0000
    2 3 0.0000 1.0000 0.0000 0.0000 2.0000 0.0000 0.0000 0.0000 3.0000 0.0000 0.0000
    3 0 0.0000 1.0000 0.0000 0.0000 2.0000 0.0000 0.0000 0.0000 3.0000 0.0000 0.0000
    3 1 0.0000 1.0000 0.0000 0.0000 2.0000 0.0000 0.0000 0.0000 3.0000 0.0000 0.0000
    3 2 0.0000 1.0000 0.0000 0.0000 2.0000 0.0000 0.0000 0.0000 3.0000 0.0000 0.0000
    3 3 0.0000 1.0000 0.0000 0.0000 2.0000 0.0000 0.0000 0.0000 3.0000 0.0000 0.0000

    I am looking forward for your answer!!
    Thanx!!
    Last edited by RedSon; Jan 18 '08, 04:12 PM. Reason: CODE!
  • RedSon
    Recognized Expert Expert
    • Jan 2007
    • 4980

    #2
    Please enclose your posted code in [code] tags (See How to Ask a Question).

    This makes it easier for our Experts to read and understand it. Failing to do so creates extra work for the moderators, thus wasting resources, otherwise available to answer the members' questions.

    Please use [code] tags in future.

    MODERATOR

    Comment

    • pilafi
      New Member
      • Nov 2007
      • 15

      #3
      Originally posted by RedSon
      Please enclose your posted code in [code] tags (See How to Ask a Question).

      This makes it easier for our Experts to read and understand it. Failing to do so creates extra work for the moderators, thus wasting resources, otherwise available to answer the members' questions.

      Please use [code] tags in future.

      MODERATOR
      So sorry,i couldnt understand it.I will post again my question.
      Sorry!

      Comment

      • RedSon
        Recognized Expert Expert
        • Jan 2007
        • 4980

        #4
        Originally posted by pilafi
        So sorry,i couldnt understand it.I will post again my question.
        Sorry!
        No don't post again. That is double posting and is a waste of time and energy and will get you one step closer to being banned from this site. Your original post is fine. Just use [CODE] tags in the future.

        -MODERATOR

        Comment

        • pilafi
          New Member
          • Nov 2007
          • 15

          #5
          Ok thnx!!Can anybody help me?I need an advice as soon as possible if its easy.
          Thnx guys!!

          Comment

          • weaknessforcats
            Recognized Expert Expert
            • Mar 2007
            • 9214

            #6
            My first suggestion is to get your arrays off the stack and onto the heap. I would never let the compiler manage the memory.

            Comment

            • pilafi
              New Member
              • Nov 2007
              • 15

              #7
              Originally posted by weaknessforcats
              My first suggestion is to get your arrays off the stack and onto the heap. I would never let the compiler manage the memory.
              Can you tell practically how can i do this??
              Thnx for answering to me!!

              Comment

              • weaknessforcats
                Recognized Expert Expert
                • Mar 2007
                • 9214

                #8
                Just allocate your own memory:

                [code=cpp]
                char onedataline[300000];

                //becomes:

                char* onedataline = new char[300000];
                [/code]

                This should require no code chnages anywhere since the heap array and the stack array use the same syntax. You just need to delete the array when you are finished with it.

                I would do this will all the local variables.

                Comment

                • pilafi
                  New Member
                  • Nov 2007
                  • 15

                  #9
                  Originally posted by weaknessforcats
                  Just allocate your own memory:

                  [code=cpp]
                  char onedataline[300000];

                  //becomes:

                  char* onedataline = new char[300000];
                  [/code]

                  This should require no code chnages anywhere since the heap array and the stack array use the same syntax. You just need to delete the array when you are finished with it.

                  I would do this will all the local variables.

                  Thank you very much!!I will try it and i will tell you what happened!!

                  Comment

                  • pilafi
                    New Member
                    • Nov 2007
                    • 15

                    #10
                    Originally posted by pilafi
                    Thank you very much!!I will try it and i will tell you what happened!!
                    Ok i followed your advice and now its working for a large file 200MB.But
                    there is a strange artifact in every image a white slice.Do you have any idea
                    how this slice created?
                    Tnx for help!!
                    I am waiting your answer!!

                    Comment

                    • weaknessforcats
                      Recognized Expert Expert
                      • Mar 2007
                      • 9214

                      #11
                      Originally posted by pilafi
                      But
                      there is a strange artifact in every image a white slice.Do you have any idea
                      how this slice created?
                      I'm not sure what you are talking about. As in "strange artifact" and "white slice".

                      If you allocate an array, it is required that a single contiguous allocation be made. The contents of the elements are indeterminate unless you have default constuctors.

                      Comment

                      • pilafi
                        New Member
                        • Nov 2007
                        • 15

                        #12
                        Originally posted by weaknessforcats
                        I'm not sure what you are talking about. As in "strange artifact" and "white slice".

                        If you allocate an array, it is required that a single contiguous allocation be made. The contents of the elements are indeterminate unless you have default constuctors.

                        Sorry again but i want you to make me a clarification.Y ou told me to allocate my
                        own memory and i did only for the two spots in my code tha it refered:
                        code (cpp):
                        char onedataline[300000];

                        I have to do it somewhere else??

                        Sorry but i am new and i need your help!!

                        Comment

                        • RedSon
                          Recognized Expert Expert
                          • Jan 2007
                          • 4980

                          #13
                          Originally posted by pilafi
                          Sorry again but i want you to make me a clarification.Y ou told me to allocate my
                          own memory and i did only for the two spots in my code tha it refered:
                          code (cpp):
                          char onedataline[300000];

                          I have to do it somewhere else??

                          Sorry but i am new and i need your help!!
                          First I think you should explain what "strange artifact" and "white slice" mean.

                          Comment

                          • pilafi
                            New Member
                            • Nov 2007
                            • 15

                            #14
                            Originally posted by RedSon
                            First I think you should explain what "strange artifact" and "white slice" mean.
                            Before i make this change you advice me my code could work for csv files until 80MB.The picture i had as a result in my screen was an image as i would like to be with 3 layers.Now after i did the change the image i get is totally different.I cant see the layers instead i can see a white layer and some strange spots.But the positive is that i can open a 200MB file, but its not correct.

                            I am looking forward for your answer!!
                            Thnx!!

                            Comment

                            • weaknessforcats
                              Recognized Expert Expert
                              • Mar 2007
                              • 9214

                              #15
                              Originally posted by pilafi
                              Before i make this change you advice me my code could work for csv files until 80MB.The picture i had as a result in my screen was an image as i would like to be with 3 layers.Now after i did the change the image i get is totally different.I cant see the layers instead i can see a white layer and some strange spots.But the positive is that i can open a 200MB file, but its not correct.
                              You never answered RedSon's question, or mine: What is this "artifact" and what is this "white slice"??????

                              A csv file is just a file where each record is a line and the values in the line a separated by commas. That's all it is.

                              The actual meaning of the values differes by application. You cannot use a csv file for a picture unless you already know the structure of the file. Just opening any old csv file will get you garbage.

                              It's starting to look like an application bug and not a memory allocation problem.

                              Exactly what are you trying to do?

                              Comment

                              Working...