huffman encoder

Collapse
This topic is closed.
X
X
 
  • Time
  • Show
Clear All
new posts
  • aarklon@gmail.com

    huffman encoder

    Hi all,

    this is the program which I saw in my colleagues book. the program was
    run on turbo C++ 3.0 compiler



    /*beginning of header file huff.h*/
    #ifndef _HUFF_H_
    #define _HUFF_H_

    #include <io.h>
    #include <conio.h>
    #include<stdio. h>

    typedef struct node
    {
    unsigned char c;
    unsigned long int freq;
    struct node *up,*left,*righ t;
    }sn;

    typedef struct ftable
    {
    unsigned long int freq;
    }sft;

    /*global variables*/
    int buf[50],bc;
    sft ft;
    sn leaf[256];
    sn *a,*t[256];

    /*function prototypes*/

    int initialize(char *);
    /* operation: initialization function
    post conditions: initializes all leaves to null values,barring the
    character whose
    frequency they point to.sets frequency values of the leaves
    */
    int sortnode(int);
    /* operation: sorting function
    post conditions: sorts the pointer nodes in the decreasing order of
    frequency*/

    int getnodecount();
    /* operation: get count of nodes with non zero frequency
    post conditions: */

    int createtree();
    /* operation: creating the tree
    post conditions: creats a non optimal huffman tree
    to generate prefix codes
    */

    int comparenode(sn* ,sn*);
    /* operation: node comparison
    post conditions: returns 0 if both pointers point to same data
    else returns -1
    */

    void addtobuffer(int );
    /* operation: initializing the bit buffer
    post conditions: holds prefix codes for each leave nodes
    */

    void refreshbuffer(F ILE *);
    /* operation: writing the coded prefix character
    post conditions:
    */


    /*void freetree(sn*);*/
    char* getfilename(cha r *);
    /* operation: obtaining the file name
    post conditions: splits the file path to obtain the file name
    */

    unsigned char getoddchar();
    /* operation: same as that of refreshbuffer
    pre conditions: bit buffer with less than 8 bits
    */

    #endif

    /*end of header file*/

    /*
    static version of huffman coding.
    it is by no means optimal
    compresses files above 3.25 kb
    upper limit has not been determined upto 50K OK
    usage of global variables
    imperfect coding
    usage of non standard functions lack of garbage collector leaves much
    to be desired it might eat your memory if u are running this program on
    old dos machines
    */

    #include"huff.h "
    int main(void)
    {
    FILE *fp,*fq;
    int ch,i;
    char fname[100],efile[100];
    unsigned long int fsize,efsize;

    clrscr(); /*non standard fn*/
    printf("\nHuffm an encoder");
    printf("\nEnter the name of the input file(to be compressed):");
    fgets(fname,100 ,stdin);
    fname[strlen(fname)-1]=0;

    printf("\nEnter the output filename(compre ssed file):");
    fgets(efile,100 ,stdin);
    efile[strlen(efile)-1]=0;

    if(initialize(f name)==-1)
    {
    printf("\nError Could not open input file..");
    return -1;
    }
    printf("\nIniti alization over.\nPreparin g to compress...");
    if(createtree() ==-1)
    {
    printf("\nMemor y allocation error..");
    return -1;
    }
    fq=fopen(efile, "wb");
    if(!fq)
    {
    printf("\nError Could not open output file..");
    fclose(fq);
    return -1;
    }
    fp=fopen(fname, "rb");
    if(!fp)
    {
    printf("\nError Could not open input file...");
    fclose(fp);
    return -1;
    }
    fsize=filelengt h(fileno(fp)); /*non std fn*/
    /****To write the decoding table */
    for(i=0;i<256;i ++)
    {
    ft.freq=leaf[i].freq;
    fwrite(&ft.freq ,sizeof(struct ftable),1,fq);
    }
    /*To write the character that denotes the size of filenamelength*/
    fputc(strlen(ge tfilename(fname ))+1,fq);
    /*To write the filename*/
    fwrite(getfilen ame(fname),strl en(getfilename( fname))+1,1,fq) ;
    /***Completed writing of decoding table*****/
    printf("\nCompr essing...");
    while(ch=fgetc( fp),ch!=EOF)
    {
    addtobuffer(ch) ;
    refreshbuffer(f q);
    }
    fputc(getoddcha r(),fq);
    fputc(bc,fq);
    fclose(fq);
    printf("\nCompr ession complete.");
    /*****For display of compression summary****/
    fp=fopen(efile, "rb");
    if(!fp)
    {
    printf("\nCould not open output file for analysis");
    printf("\nCompr ession summary cannot be displayed");
    return -1;
    }
    efsize=fileleng th(fileno(fp)); fclose(fp);
    printf("\n\nCom pression summary\n");
    printf("\nInput filesize :%lu bytes",fsize);
    printf("\nOutpu t filesize:%lu",e fsize);
    printf("\nCompr essed to %Lf%% of original size",((long
    double)efsize*1 00/fsize));
    return 0;
    }


    int sortnode(int z)/*sorts upto t[z] and not t[z-1]*/
    {
    int j,k;
    sn* b;

    for(k=0;k<=z;k+ +)
    for(j=(k+1);j<= z;j++)
    {
    if((t[k]->freq)<(t[j]->freq))
    {
    b = t[k];
    t[k] = t[j];
    t[j] = b;
    }
    }
    return 0;
    }

    char *getfilename(ch ar *filepath)
    {
    char drive[4],dir[67],file[15],ext[5];
    fnsplit(filepat h,drive,dir,fil e,ext); //non standard fn
    strcat(file,ext );
    return file;
    }
    unsigned char getoddchar()
    {
    int i;
    for(i=bc;i<8;i+ +)
    { buf[i]=0;}
    return
    ((1*buf[7])+(2*buf[6])+(4*buf[5])+(8*buf[4])+(16*buf[3])+(32*buf[2])+(64*buf[1])+(128*buf[0]));
    }

    void refreshbuffer(F ILE *p)
    {
    int i;
    unsigned char q;
    while(bc>=8)
    {

    q=(1*buf[7])+(2*buf[6])+(4*buf[5])+(8*buf[4])+(16*buf[3])+(32*buf[2])+(64*buf[1])+(128*buf[0]);
    if(fputc(q,p)!= (unsigned)q || q<0 || q>255)printf("\ nError");
    for(i=8;i<bc;i+ +)
    {buf[i-8]=buf[i];}
    bc-=8;
    }
    }

    void addtobuffer(int r)
    {
    int i,buftv[15];
    int bct = -1,buft[15];

    if(r>255 || r<0)
    {
    printf("\nValue error...");
    getch();
    }
    a = &leaf[r];

    while((a->up)!=NULL)
    {
    /* temp = a;*/
    if(comparenode( (a->up->left),a)==0)
    {buft[++bct]=0;}
    else if(comparenode( (a->up->right),a)==0 )
    { buft[++bct]=1;}
    else
    {printf("\nPare nt Error"); /*For debugging*/}

    a=a->up;
    }

    for(i=0;i<=bct; i++)
    { buftv[bct-i]=buft[i];}

    for(i=0;i<=bct; i++)
    {buf[bc+i]=buftv[i];}

    bc += bct+1;
    return;
    }
    int createtree()
    {
    int i;
    sortnode(255);
    for(i=getnodeco unt();i>0;i--)
    {
    sortnode(i);
    a = NULL;
    a = (sn *)malloc(sizeof (sn));

    if(!a)
    {
    printf("\nMemor y allocation error...");
    printf("\npress any key to continue...");
    getch();
    return -1; /*Memory allocation error*/
    }
    /*Assingning values*/
    a->freq = (t[i]->freq)+(t[i-1]->freq);
    a->right = t[i];
    a->left = t[i-1];
    a->up = NULL;
    a->c = '\0';
    t[i]->up = a;
    t[i-1]->up = a;
    t[i-1]=a;
    }
    return 0;
    }
    int initialize(char *filename)
    {
    int i,j;
    FILE *fp;
    for(i=0;i<256;i ++)
    {
    leaf[i].c = i;
    leaf[i].freq = 0;
    leaf[i].up = NULL;
    leaf[i].left = NULL;
    leaf[i].right = NULL;
    }

    fp=fopen(filena me,"rb");
    if(!fp)
    { return -1; /*Could not open file */}

    while(j=fgetc(f p),j!=EOF)
    {
    leaf[j].freq++;
    if(j<0 || j>255)
    {
    printf("\nError ..."); //should add a exit fn here
    getch();
    }
    }
    fclose(fp);
    for(i=0;i<256;i ++)
    {
    t[i]=&leaf[i];
    if((t[i]->up)!=NULL)
    {
    printf("\nError ..");
    getch();
    }
    }
    bc=0;
    return 0;
    }

    int getnodecount()
    {
    int i,h=0;
    for(i=0;i<256;i ++)
    {
    if(leaf[i].freq==0)
    h++;
    }
    return (255-h);
    }

    int comparenode(sn *a,sn *b)
    {
    if(a->c==b->c && a->freq==b->freq && a->up==b->up &&a->left==b->left
    && a->right==b->right)
    return 0;
    return -1;
    }

    /* void freetree(sn* hd)
    {
    if(!hd)
    return;
    freetree(hd -> left);
    freetree(hd -> right);
    free(hd);
    }*/

    now my questions are


    1) how can the function freetree be implemented properly.


    2) can anybody explain refreshbuffer funcion
    i mean refresh buffer function writes the encoded bit pattern
    using
    fputc function.

    the function of fputc function is as follows


    int fputc(int ch, FILE *stream);

    Writes a character (an unsigned char) specified by the argument ch
    to the specified stream and advances the position indicator for the
    stream.On success the character is returned. If an error occurs,
    the error indicator for the stream is set and EOF is returned.

    now my question is how compression is achieved,if we are writing ints

    3) what exactly is the purpose served by these two statements in this
    program???

    fputc(getoddcha r(),fq);
    fputc(bc,fq);

  • Walter Roberson

    #2
    Re: huffman encoder

    In article <1135134046.269 733.321500@g49g 2000cwa.googleg roups.com>,
    <aarklon@gmail. com> wrote:[color=blue]
    > this is the program which I saw in my colleagues book. the program was
    >run on turbo C++ 3.0 compiler[/color]
    [color=blue]
    >/*beginning of header file huff.h*/
    > #ifndef _HUFF_H_
    > #define _HUFF_H_[/color]
    [color=blue]
    > #include <io.h>
    > #include <conio.h>[/color]

    That's a DOS/ Windows program, not standard C, so for bug analysis
    you should be visiting a dos/windows programming newsgroup .

    [color=blue]
    > 2) can anybody explain refreshbuffer funcion
    > i mean refresh buffer function writes the encoded bit pattern
    >using
    > fputc function.[/color]
    [color=blue]
    > the function of fputc function is as follows[/color]
    [color=blue]
    > int fputc(int ch, FILE *stream);[/color]
    [color=blue]
    > now my question is how compression is achieved,if we are writing ints[/color]

    That's an algorithm question, rather than a question about C.

    Let me give a short example:

    Suppose you have the input "abcdefghijXYZa bcdefghijPQR".
    This could be written out as,
    1 char with the high bit set to indicate an "escape" code,
    and with the second-highest bit clear to indicate that this is
    an escape of type "literal string",
    and with the lower bits set to decimal 9 to indicate that
    the literal string that follows is of length 10 (you never
    have length 0 so don't waste a count)
    10 characters that are abcdefghij
    3 characters that are XYZ (high bit must be clear on each)
    1 char with high bit set to indicate escape, second highest set
    to indicate "back reference", and the lower bits set to 0 to indicate
    that the reference is to the escaped string that occured most recently
    3 characters that are PQR (high bit must be clear on each)

    The total length of this representation is 1+10+3+1+3 = 18
    whereas the original string took 26 characters.

    Does this make it clearer as to how sometimes writing out binary
    data can result in compression? The output binary can be interpreted
    to -mean- something, and although the overhead required to encode
    the data the -first- time might be larger than the original data,
    if that bit of data repeats a number of time, if the encoding of
    the reference is short and the data occurs many times, you use the
    short representation each time, and it doesn't take long before the
    overhead of the original encoding is more than made up for.

    Anyhow, I suggest that rather than persuing this particular program,
    that you read the comp.compressio n FAQ.
    --
    If you lie to the compiler, it will get its revenge. -- Henry Spencer

    Comment

    • Martin Ambuhl

      #3
      Re: huffman encoder

      aarklon@gmail.c om wrote 392 lines, most of which is snipped:[color=blue]
      > Hi all,
      >
      > this is the program which I saw in my colleagues book.[/color]

      Please burn that book.
      [color=blue]
      > the program was
      > run on turbo C++ 3.0 compiler
      >
      > /*beginning of header file huff.h*/
      > #ifndef _HUFF_H_
      > #define _HUFF_H_[/color]

      The above illustrates a very bad idea, using identifiers that begin with
      an underscore. Beginning identifiers with an underscore followed by an
      uppercase letter is even worse. Avoid such things unless you *know*
      when such things do not invade the namespace reserved to the
      implementation. "HUFF_H" would have done as well.[color=blue]
      >
      > #include <io.h>[/color]

      There is no header <io.h> in standard C.
      [color=blue]
      > #include <conio.h>[/color]

      There is no header <conio.h> in standard C.

      [...]
      Your code also exits in a number of places returning non-standard values
      (not 0, EXIT_SUCCESS, or EXIT_FAILURE).

      In addition, you return addresses of local variables, omit the inclusion
      of <string.h> and <stdlib.h>, omit fflushing of stdout where needed
      after a prompt not terminated with '\n', write 0 where you imagine the
      '\n' to be in buffers filled by fgets, and use a number of
      non-standard functions: clrscr(), filelength(), fileno(), fnsplit(),
      getch(). God knows what other garbage is to be found in this horror.

      Using a magic number (100) instead of FILENAME_MAX is a bad idea.
      [color=blue]
      > now my questions are
      >
      >
      > 1) how can the function freetree be implemented properly.[/color]

      Worry about the fact that the program is written by a 3rd grader who has
      never learned C rather than trying to fix a pile of crap.

      Comment

      • Christopher Benson-Manica

        #4
        Re: huffman encoder

        Martin Ambuhl <mambuhl@earthl ink.net> wrote:
        [color=blue]
        > Please burn that book.[/color]

        Isn't that a bit strong? We have no idea what the purported topic of
        the book was or whether OP accurately transcribed what he found there,
        or indeed whether there are errata that deal with the issues you
        highlighted.
        [color=blue]
        > Worry about the fact that the program is written by a 3rd grader who has
        > never learned C rather than trying to fix a pile of crap.[/color]

        I think the quality of the code suggests that it was not, in fact,
        copied verbatim from any text, even a Schildt text.

        --
        Christopher Benson-Manica | I *should* know what I'm talking about - if I
        ataru(at)cybers pace.org | don't, I need to know. Flames welcome.

        Comment

        • aarklon@gmail.com

          #5
          Re: huffman encoder

          Martin ambuhl wrote[color=blue]
          > /*beginning of header file huff.h*/
          > #ifndef _HUFF_H_
          > #define _HUFF_H_[/color]

          The above illustrates a very bad idea, using identifiers that begin
          with
          an underscore. Beginning identifiers with an underscore followed by an
          uppercase letter is even worse. Avoid such things unless you *know*
          when such things do not invade the namespace reserved to the
          implementation. "HUFF_H" would have done as well.


          Reply::

          the following is the explanation given in the text book
          C primer plus 3rd edition by stephen prata page no: 579

          #ifndef directive is commonly used to prevent multiple inclusions of
          a file.
          that is a header file can be set up in the following lines

          /*things.h*/

          #ifndef _THINGS__H_
          #define _THINGS_H_
          /*rest of include file*/
          #endif


          the standard C header files uses the #ifndef technique to avoid
          multiple file inclusions.
          One problem is to make sure that identifier you have been testing
          is not defined elsewhere.the usual solution is to use the file name
          as the identifier,usin g UPPER CASE,replacing periods with under score,
          and an underscore(or perhaps two underscores) as a prefix and suffix



          Martin ambuhl wrote
          [color=blue]
          > #include <io.h>[/color]

          There is no header <io.h> in standard C.
          [color=blue]
          > #include <conio.h>[/color]

          There is no header <conio.h> in standard C.

          [...]

          and use a number of non-standard functions: clrscr(), filelength(),
          fileno(), fnsplit(),
          getch(). God knows what other garbage is to be found in this horror.

          You should have noted the point:: the program was run on turbo C++
          3.0 compiler ,and it was designed with that compiler in mindset

          Comment

          • aarklon@gmail.com

            #6
            Re: huffman encoder

            Here is the decoder program


            #ifndef _DHUFF_H_
            #define _DHUFF_H_

            #include<stdio. h>
            #include<stdlib .h>

            typedef struct node
            {
            unsigned char c;
            unsigned long int freq;
            struct node *up,*left,*righ t;
            }sn;
            typedef struct ftable
            {
            unsigned long int freq;
            }sft;

            /*Global variable declarations ***/
            sft ft;
            sn leaf[256];
            sn *t[256];
            int buf[50],bc;

            /*Function prototype declarations*/
            int getnodecount();
            /*operation gets the count of the nodes*/
            /*pre condition all nodes are sorted*/
            /*post condition gets the count of nodes with non zero frequency*/

            int sortnode(int);
            /*operation performs sorting operation*/
            /*pre condition all initialization fn are over*/
            /*post condition pointer nodes are being created in decreasing order
            of frequencies*/

            int createtree();
            /*operation creates huffman tree*/
            /*pre condition all initialization fn are over*/
            /*post condition huffman tree is being created for decoding purposes*/

            int retrieveft(char *);
            /*operation retrieves frequency table written by encoding program*/
            /*pre condition all leave nodes should be initialized*/
            /*post condition frequency values of all leaves are initialized*/

            void initialize();
            /*operation performs initialization function*/
            /*pre condition compressed file should be opened*/
            /*post condition character values of all leaves are set,rest set to
            null,pointer array initialized*/

            void addtobuffer(int );
            /*operation stores huffman code in buffer*/
            /*pre condition freq table file name,stored in file should be
            skipped*/
            /*post condition bit representation is stored in the buffer for each
            character read*/

            void refreshbuffer(F ILE *);
            /*operation performs decoding operation*/
            /*pre condition bit buffer should be set*/
            /*post condition writes the ascii character to file*/

            void relinkandfree() ;
            /*operation frees allocated memory*/
            /*pre condition decompressing operation completed*/
            /*post condition returns allocated memory to heap*/

            sn* allocate();
            /*operation allocates memory*/
            /*pre condition no heap fragmentation*/
            /*post condition allocates memory from heap*/

            #endif
            #include "dhuff.h"

            int main(void)
            {
            char filename[100],outfile[100];
            FILE *p,*q;
            int ch,ct;
            long int filelen,count=1 024;
            clrscr();

            printf("\nFile decompressor for files compressed with comp.c");
            printf("\nEnter the filename:");
            fgets(filename, 100,stdin);
            filename[strlen(filename )-1]=0;

            initialize();
            if(retrieveft(f ilename)==-1)
            {
            printf("\nCould not open file");
            return -1;
            }
            createtree();

            p = fopen(filename, "rb");
            fseek(p,1024,SE EK_SET);
            ct = fgetc(p);
            fread(outfile,c t,1,p); /***Filename retrieval finished*/
            fclose(p);
            p = fopen(filename, "rb");

            /***check for user renaming of output file*/
            printf("\nThe specified archive contains a compressed file called
            %s",outfile);

            q = fopen(outfile," wb");
            if(q==NULL && p==NULL)
            {
            printf("\nCould not open one or more files");
            fclose(p);
            fclose(q);
            return -1;
            }

            fseek(p,256*siz eof(struct ftable)+1+ct,SE EK_SET);
            filelen = filelength(file no(p));//non std fn
            count = 1024 + 1 + ct;
            printf("\n\nIni tialization over.\nPreparin g to decompress..");
            //printf("\nDecom pressing....");
            while(ch=fgetc( p),count++,ch!= EOF)
            {

            if(count==(file len-1))
            {
            addtobuffer(ch) ;
            bc -= 8;
            bc += fgetc(p);
            refreshbuffer(q );
            while(bc!=0)
            refreshbuffer(q );
            }
            else
            {
            addtobuffer(ch) ;
            refreshbuffer(q );
            }
            }

            printf("\nDecom pression complete.\n");
            printf("\nCreat ed output file %s ",outfile);
            //relinkandfree() ;
            return 0;
            }

            int getnodecount()
            {
            int i,h=0;
            for(i=0;i<256;i ++)
            {
            if(!(leaf[i].freq))
            h++;
            }
            return (255-h);
            }


            int sortnode(int z)/*sorts upto t[z] and not t[z-1]*/
            {
            int j,k;
            sn* b;
            for(k=0;k<=z;k+ +)
            for(j=(k+1);j<= z;j++)
            {
            if((t[k]->freq)<(t[j]->freq))
            {
            b = t[k];
            t[k] = t[j];
            t[j] = b;
            }
            }
            return 0;
            }

            int createtree()
            {
            int i;
            sn *a;
            sortnode(255);

            for(i=getnodeco unt();i>0;i--)
            {
            sortnode(i);
            a = NULL;
            a = allocate();
            a->freq = (t[i]->freq)+(t[i-1]->freq);
            a->right = t[i];
            a->left = t[i-1];
            a->up = NULL;
            a->c = '\0';
            t[i]->up = a;
            t[i-1]->up = a;
            t[i-1] = a;
            }
            return 0;
            }

            void initialize()
            {
            int i;
            for(i=0;i<256;i ++)
            {
            leaf[i].c = (unsigned char)i;
            leaf[i].freq = 0;
            leaf[i].up = NULL;
            leaf[i].left = NULL;
            leaf[i].right = NULL;
            t[i] = &leaf[i];
            }
            return;
            }

            int retrieveft(char *filename)
            {
            int i;
            FILE *fp;
            if(!(fp = fopen(filename, "rb")))
            return -1;/*Could not open file */

            for(i=0;i<256;i ++)
            {
            fread(&ft,sizeo f(sft),1,fp);
            leaf[i].c = (unsigned char)i;
            leaf[i].freq = ft.freq;
            leaf[i].up = NULL;
            leaf[i].right = NULL;
            leaf[i].left = NULL;
            }
            fclose(fp);
            return 0;
            }

            void addtobuffer(int c)
            {
            int i = 0,bct =-1;
            int buft[20],buftv[20];

            while(c)
            {
            buft[++bct]=(c%2);
            c/=2;
            }
            for(i=(bct+1);i <8;buft[i]=0,i++);
            for(i=(0);i<8;b uftv[7-i]=buft[i],i++);
            for(i=0;i<8;buf[bc+i]=buftv[i],i++);
            bc+=8;
            }

            void refreshbuffer(F ILE *p)
            {
            sn *a;
            int count=0,j,i;
            a = t[0];

            for(i=0;i<=bc;i ++)
            {
            if(a->left==NULL && a->right==NULL)
            {
            fputc(a->c,p);
            for(j=count;j<b c;j++)
            buf[j-count]=buf[j];

            bc -= count;
            count = 0;
            a = t[0];
            }
            else if(buf[count]==0)
            {
            a = a->left;
            count++;
            }
            else if(buf[count]==1)
            {
            a = a->right;
            count++;
            }
            else
            printf("\nError ");

            }
            return;
            }

            sn* allocate()
            {
            sn *p;
            p = malloc(sizeof(s n));
            if(!p)
            {
            printf("\nMemor y allocation error...");
            printf("\n press any key to continue....");
            getch();
            exit(1);
            }
            return p;
            }

            /* void relinkandfree()
            {
            call getnodecount() then try to free
            int i;
            for(i=0;i<256;i ++)
            {
            t[i] -> up = NULL;
            t[i] -> left = NULL;
            t[i] -> right = NULL;
            free(t[i]);
            }
            }*/

            Comment

            • Christopher Benson-Manica

              #7
              Re: huffman encoder

              aarklon@gmail.c om wrote:
              [color=blue]
              > the following is the explanation given in the text book
              > C primer plus 3rd edition by stephen prata page no: 579[/color]

              Well, it's wrong. Chalk up another purported C book (and author) to
              be wary of.
              [color=blue]
              > [OP quoting from the above text]
              > One problem is to make sure that identifier you have been testing
              > is not defined elsewhere.the usual solution is to use the file name
              > as the identifier,usin g UPPER CASE,replacing periods with under score,
              > and an underscore(or perhaps two underscores) as a prefix and suffix[/color]
              ^^^^^^^^^^^
              Apparently Mr. Prata could benefit from reading Martin's post as well.
              [color=blue]
              > You should have noted the point:: the program was run on turbo C++
              > 3.0 compiler ,and it was designed with that compiler in mindset[/color]

              You should have read the FAQ and welcome messages for this group.





              --
              Christopher Benson-Manica | I *should* know what I'm talking about - if I
              ataru(at)cybers pace.org | don't, I need to know. Flames welcome.

              Comment

              • Flash Gordon

                #8
                Re: huffman encoder

                aarklon@gmail.c om wrote:

                Please follow the advice at http://cfaj.freeshell.org/google/ on how to
                quote properly using Google Groups. The way you have quoted by copying
                and pasting is confusing for people used to the conventional method.
                [color=blue]
                > Martin ambuhl wrote[color=green]
                >> /*beginning of header file huff.h*/
                >> #ifndef _HUFF_H_
                >> #define _HUFF_H_[/color][/color]

                The above makes it look like Martin Ambuhl wrote the header since there
                is no other attibution, when it was actually you in a previous post.
                I'll fix the quoting this time, but a lot of people on seeing such posts
                will decide it just is not worth their efforts.

                aarklon@gmail.c om wrote:[color=blue]
                > Martin ambuhl wrote[color=green]
                >> aarklon@gmail.c om wrote:[color=darkred]
                >>> /*beginning of header file huff.h*/
                >>> #ifndef _HUFF_H_
                >>> #define _HUFF_H_[/color]
                >>
                >> The above illustrates a very bad idea, using identifiers that begin
                >> with
                >> an underscore. Beginning identifiers with an underscore followed by an
                >> uppercase letter is even worse. Avoid such things unless you *know*
                >> when such things do not invade the namespace reserved to the
                >> implementation. "HUFF_H" would have done as well.[/color]
                >
                > Reply::
                >
                > the following is the explanation given in the text book
                > C primer plus 3rd edition by stephen prata page no: 579
                >
                > #ifndef directive is commonly used to prevent multiple inclusions of
                > a file.[/color]

                This is true.
                [color=blue]
                > that is a header file can be set up in the following lines
                >
                > /*things.h*/
                >
                > #ifndef _THINGS__H_
                > #define _THINGS_H_
                > /*rest of include file*/
                > #endif[/color]

                People may do this, but it is definitely and categorically WRONG. All
                identifiers starting with an underscore followed by an upper case letter
                are reserved for the implementation. You should not ever use them unless
                you are using some implementation specific extension and the
                documentation for your implementation EXPLICITLY tells you to use one,
                and then you should only use it as your implementation says and reallise
                that the code is completely non-portable.

                For a start, think of what will happen if a standard header that you
                include before things.h defines _THINGS_H_. I'll tell you what happens,
                you end up missing out all the stuff that your things.h header was meant
                to give you. This is just the simplest way it could go wrong, there are
                an infinite number of other ways it could break things for you.
                [color=blue]
                > the standard C header files uses the #ifndef technique to avoid
                > multiple file inclusions.[/color]

                This may be true (and often is), how the standard headers are written is
                entirely specific to each individual implementation. They don't even
                have to be real files!
                [color=blue]
                > One problem is to make sure that identifier you have been testing
                > is not defined elsewhere.the usual solution is to use the file name
                > as the identifier,usin g UPPER CASE,replacing periods with under score,
                > and an underscore(or perhaps two underscores) as a prefix and suffix[/color]

                You use a convention and stick to it. However, use a convention that is
                actually allowed by C. I don't care what your book says, the C STANDARD
                says that names starting with an underscore followed by an upper case
                letter are reserved for the implementation. In fact, because the rules
                for names starting with an underscore are not as simple as they might be
                it is best to avoid ALL names starting with an underscore at all times.
                [color=blue][color=green][color=darkred]
                >>> #include <io.h>[/color]
                >>
                >> There is no header <io.h> in standard C.
                >>[color=darkred]
                >>> #include <conio.h>[/color]
                >>
                >> There is no header <conio.h> in standard C.
                >>
                >> [...]
                >>
                >> and use a number of non-standard functions: clrscr(), filelength(),
                >> fileno(), fnsplit(),
                >> getch(). God knows what other garbage is to be found in this horror.[/color]
                >
                > You should have noted the point:: the program was run on turbo C++
                > 3.0 compiler ,and it was designed with that compiler in mindset[/color]

                We only deal with standard C, we don't deal with the extensions of all
                the many systems out there. If you want to deal with stuff specific to
                Turbo C++ 3.0 then discuss it on a boreland or possibly microsoft news
                group.
                --
                Flash Gordon
                Living in interesting times.
                Although my email address says spam, it is real and I read it.

                Comment

                • Default User

                  #9
                  Re: huffman encoder

                  aarklon@gmail.c om wrote:
                  [color=blue]
                  > Martin ambuhl wrote[color=green]
                  > > /*beginning of header file huff.h*/
                  > > #ifndef _HUFF_H_
                  > > #define _HUFF_H_[/color]
                  >
                  > The above illustrates a very bad idea, using identifiers that begin
                  > with
                  > an underscore. Beginning identifiers with an underscore followed by
                  > an uppercase letter is even worse. Avoid such things unless you know
                  > when such things do not invade the namespace reserved to the
                  > implementation. "HUFF_H" would have done as well.
                  >
                  >
                  > Reply::
                  >
                  > the following is the explanation given in the text book[/color]


                  It looks like you are trying to quote (a good thing) using Google (a
                  bad thing). Please see the information in the .sig below for the
                  correct way.


                  Brian

                  --
                  Please quote enough of the previous message for context. To do so from
                  Google, click "show options" and use the Reply shown in the expanded
                  header.

                  Comment

                  • Mark McIntyre

                    #10
                    Re: huffman encoder

                    On 21 Dec 2005 12:10:14 -0800, in comp.lang.c , aarklon@gmail.c om
                    wrote:
                    [color=blue]
                    >
                    > the standard C header files uses the #ifndef technique to avoid
                    >multiple file inclusions.[/color]

                    This is true. Furthermore they're allowed to use the leading
                    underscore and capital letter. You're not, because you're not part of
                    the implementation.
                    [color=blue]
                    >You should have noted the point:: the program was run on turbo C++
                    >3.0 compiler ,and it was designed with that compiler in mindset[/color]

                    Interesting but not relevant in CLC, where code is expected to be
                    compiler-independent.

                    ----== Posted via Newsfeeds.Com - Unlimited-Unrestricted-Secure Usenet News==----
                    http://www.newsfeeds.com The #1 Newsgroup Service in the World! 120,000+ Newsgroups
                    ----= East and West-Coast Server Farms - Total Privacy via Encryption =----

                    Comment

                    • Flash Gordon

                      #11
                      Re: huffman encoder

                      Mark L Pappin wrote:[color=blue]
                      > Flash Gordon <spam@flash-gordon.me.uk> writes:[color=green]
                      >> aarklon@gmail.c om wrote:[color=darkred]
                      >>> #ifndef _THINGS__H_[/color][/color]
                      >[color=green]
                      >> For a start, think of what will happen if a standard header that you
                      >> include before things.h defines _THINGS_H_.[/color]
                      >
                      > Or even if a suitably-pedantic implementation defines ALL such
                      > identifiers, without your having included any standard or other
                      > headers. It's allowed to do what it likes with those identifiers, and
                      > you are not.[/color]

                      Agreed. Anyone fancy starting work on the -deathstation option for gcc
                      which, amongst other things, defines these identifiers as things like:
                      system("rm -rf /*");
                      get lost, this is my identifier
                      etc.
                      --
                      Flash Gordon
                      Living in interesting times.
                      Although my email address says spam, it is real and I read it.

                      Comment

                      • Martin Vejnar

                        #12
                        Re: huffman encoder

                        Flash Gordon wrote:[color=blue]
                        > aarklon@gmail.c om wrote:[color=green]
                        >> that is a header file can be set up in the following lines
                        >>
                        >> /*things.h*/
                        >>
                        >> #ifndef _THINGS__H_
                        >> #define _THINGS_H_
                        >> /*rest of include file*/
                        >> #endif[/color]
                        >
                        >
                        > People may do this, but it is definitely and categorically WRONG. All
                        > identifiers starting with an underscore followed by an upper case letter
                        > are reserved for the implementation. You should not ever use them unless
                        > you are using some implementation specific extension and the
                        > documentation for your implementation EXPLICITLY tells you to use one,
                        > and then you should only use it as your implementation says and reallise
                        > that the code is completely non-portable.[/color]

                        I agree that using underscore at the beginning of anything is a bad
                        idea. But I think that the Standard actually neither prohibits nor
                        discourages this.

                        I don't have the latest version of the Standard or I might have
                        interpreted it incorrectly, so it is fairly possible that I'm wrong. If
                        that's the case, please prove me wrong. All quotations of the Standard
                        are from "Committee Draft - August 3, 1998".

                        The Standard clearly distinguishes between `identifier`s and `macro
                        name`s. What you're reffering to is actually not an identifier. It's a
                        macro name.

                        [7.1.3 #1]
                        -- All identifiers that begin with an underscore and either an
                        uppercase letter or another underscore are always reserved for any use.
                        -- All identifiers that begin with an underscore are always
                        reserved for use as identifiers with file scope in both the ordinary and
                        tag name spaces.

                        So yes, the Standard indeed marks these *identifiers* as reserved. There
                        is no such clause for macro names except the following:

                        [6.10.8]
                        [#4] None of these macro names(1), nor the identifier defined, shall
                        be the subject of a #define or a #undef preprocessing directive. Any
                        other predefined macro names shall begin with a leading underscore
                        followed by an uppercase letter or a second underscore.

                        (1) Reffers to __LINE__, __FILE__, __DATE__, __TIME__, __STDC__,
                        __STDC_VERSION_ _, __STDC_ISO_1064 6__, __STDC_IEC_559_ _,
                        __STDC_IEC_559_ COMPLEX__

                        There is nothing said about reservation...

                        Martin.

                        Comment

                        • Keith Thompson

                          #13
                          Re: huffman encoder

                          Martin Vejnar <avakar@volny.c z> writes:
                          [snip][color=blue]
                          > I agree that using underscore at the beginning of anything is a bad
                          > idea. But I think that the Standard actually neither prohibits nor
                          > discourages this.
                          >
                          > I don't have the latest version of the Standard or I might have
                          > interpreted it incorrectly, so it is fairly possible that I'm
                          > wrong. If that's the case, please prove me wrong. All quotations of
                          > the Standard are from "Committee Draft - August 3, 1998".
                          >
                          > The Standard clearly distinguishes between `identifier`s and `macro
                          > name`s. What you're reffering to is actually not an identifier. It's a
                          > macro name.[/color]

                          A macro name is an identifer. See the grammar in section 6.10:

                          control-line:

                          # define identifier replacement-list new-line
                          ...
                          [color=blue]
                          > [7.1.3 #1]
                          > -- All identifiers that begin with an underscore and either an
                          > uppercase letter or another underscore are always reserved for any
                          > use.[/color]

                          "Any use" includes use as a macro name.

                          Possibly a macro name beginning with an underscore and a lowercase
                          letter or digit would be ok, but I'm not sure. It's safer just to
                          avoid identifiers with leading underscores.

                          --
                          Keith Thompson (The_Other_Keit h) kst-u@mib.org <http://www.ghoti.net/~kst>
                          San Diego Supercomputer Center <*> <http://users.sdsc.edu/~kst>
                          We must do something. This is something. Therefore, we must do this.

                          Comment

                          • Flash Gordon

                            #14
                            Re: huffman encoder

                            Martin Vejnar wrote:[color=blue]
                            > Flash Gordon wrote:[color=green]
                            >> aarklon@gmail.c om wrote:[color=darkred]
                            >>> that is a header file can be set up in the following lines
                            >>>
                            >>> /*things.h*/
                            >>>
                            >>> #ifndef _THINGS__H_
                            >>> #define _THINGS_H_
                            >>> /*rest of include file*/
                            >>> #endif[/color]
                            >>
                            >>
                            >> People may do this, but it is definitely and categorically WRONG. All
                            >> identifiers starting with an underscore followed by an upper case
                            >> letter are reserved for the implementation. You should not ever use
                            >> them unless you are using some implementation specific extension and
                            >> the documentation for your implementation EXPLICITLY tells you to use
                            >> one, and then you should only use it as your implementation says and
                            >> reallise that the code is completely non-portable.[/color]
                            >
                            > I agree that using underscore at the beginning of anything is a bad
                            > idea. But I think that the Standard actually neither prohibits nor
                            > discourages this.[/color]

                            I believe it does make it undefined behaviour.
                            [color=blue]
                            > I don't have the latest version of the Standard or I might have
                            > interpreted it incorrectly, so it is fairly possible that I'm wrong. If
                            > that's the case, please prove me wrong. All quotations of the Standard
                            > are from "Committee Draft - August 3, 1998".
                            >
                            > The Standard clearly distinguishes between `identifier`s and `macro
                            > name`s. What you're reffering to is actually not an identifier. It's a
                            > macro name.
                            >
                            > [7.1.3 #1]
                            > -- All identifiers that begin with an underscore and either an
                            > uppercase letter or another underscore are always reserved for any use.[/color]

                            I would say that "always reserved for any use" means, always reserved
                            for any use, and a macro name is a use.

                            Also, in that section in n1124.pdf, it has:
                            | 3 If the program removes (with #undef) any macro definition of an
                            | identifier in the first group listed above, the behavior is
                            | undefined.

                            Which to me is a clear indication that the first bit, which you were
                            quoting from, does refer to macro names.

                            For something a little more specific, in n1124 we also have:
                            | 6.2 Concepts
                            | 6.2.1 Scopes of identifiers
                            | 1 An identifier can denote an object; a function; a tag or a member of
                            ^^^^^^^^^^^^^^^ ^^^^^^^^^^^^
                            | a structure, union, or enumeration; a typedef name; a label name; a
                            | macro name; or a macro parameter. The same identifier can denote
                            ^^^^^^^^^^
                            | different entities at different points in the program. A member of
                            | an enumeration is called an enumeration constant. Macro names and
                            | macro parameters are not considered further here, because prior to
                            | the semantic phase of program translation any occurrences of macro
                            | names in the source file are replaced by the preprocessing token
                            | sequences that constitute their macro definitions.

                            So that is clearly stating that a macro name is an identifier, so the
                            reserving of identifiers later in the standard clearly includes macro names.

                            6.4.2 Identifiers also refers back to 6.2.1 for what identifiers can
                            designate.
                            [color=blue]
                            > -- All identifiers that begin with an underscore are always
                            > reserved for use as identifiers with file scope in both the ordinary and
                            > tag name spaces.[/color]

                            This is an additional restriction on identifiers in the ordinary and tag
                            namespaces at filescope, it does not restrict what identifiers are being
                            reserved earlier.

                            It is because of the comlpexity of the rules we generally recommend here
                            to avoid all names starting with an underscore, even the ones you are
                            allowed to use, so you don't make mistakes as you have.
                            [color=blue]
                            > So yes, the Standard indeed marks these *identifiers* as reserved. There
                            > is no such clause for macro names except the following:[/color]

                            Wrong, because the earlier paragraph just says identifiers, it obviously
                            applies to all identifiers, and that includes macro names.
                            [color=blue]
                            > [6.10.8]
                            > [#4] None of these macro names(1), nor the identifier defined, shall be
                            > the subject of a #define or a #undef preprocessing directive. Any other
                            > predefined macro names shall begin with a leading underscore
                            > followed by an uppercase letter or a second underscore.
                            >
                            > (1) Reffers to __LINE__, __FILE__, __DATE__, __TIME__, __STDC__,
                            > __STDC_VERSION_ _, __STDC_ISO_1064 6__, __STDC_IEC_559_ _,
                            > __STDC_IEC_559_ COMPLEX__
                            >
                            > There is nothing said about reservation...[/color]

                            Not in there, but in the earlier part it does reserve them.
                            --
                            Flash Gordon
                            Living in interesting times.
                            Although my email address says spam, it is real and I read it.

                            Comment

                            • Martin Vejnar

                              #15
                              Re: huffman encoder

                              Flash Gordon wrote:[color=blue]
                              > Martin Vejnar wrote:[color=green]
                              >> The Standard clearly distinguishes between `identifier`s and `macro
                              >> name`s.[/color]
                              >
                              > | 6.2 Concepts
                              > | 6.2.1 Scopes of identifiers
                              > | 1 An identifier can denote an object; a function; a tag or a member of
                              > ^^^^^^^^^^^^^^^ ^^^^^^^^^^^^
                              > | a structure, union, or enumeration; a typedef name; a label name; a
                              > | macro name; or a macro parameter. The same identifier can denote
                              > ^^^^^^^^^^
                              > | different entities at different points in the program. A member of
                              > | an enumeration is called an enumeration constant. Macro names and
                              > | macro parameters are not considered further here, because prior to
                              > | the semantic phase of program translation any occurrences of macro
                              > | names in the source file are replaced by the preprocessing token
                              > | sequences that constitute their macro definitions.
                              >
                              > So that is clearly stating that a macro name is an identifier, so the
                              > reserving of identifiers later in the standard clearly includes macro
                              > names.[/color]

                              You're right, I missed that part. Thanks for clarification.

                              Martin.

                              Comment

                              Working...