cat

Collapse
This topic is closed.
X
X
 
  • Time
  • Show
Clear All
new posts
  • user923005

    #16
    Re: cat

    On Mar 6, 12:35 pm, user923005 <dcor...@connx. comwrote:
    On Mar 6, 12:30 pm, user923005 <dcor...@connx. comwrote:
    >
    >
    >
    >
    >
    On Mar 5, 6:06 pm, Jag <talon....@gmai l.comwrote:
    >
    I've read parts of K&R's ANSI C v2 and this is what their cat looked
    like but when I compared the speed of this code to gnu cat, it seems
    very slow. How do I optimize this for greater speeds? is there an
    alternative algorithm?
    >
    void catfile(FILE *in, FILE *out) {
        register int num_char;
    >
        /*Get characters*/
        while ((num_char = getc(in)) != EOF) {
            /*Print to standard output*/
            putc(num_char, out);
        }
    >
    }
    >
    C:\tmp>dir dict.sql
     Volume in drive C has no label.
     Volume Serial Number is 0890-87CA
    >
     Directory of C:\tmp
    >
    03/01/2007  11:48 AM         7,127,408 dict.sql
                   1 File(s)      7,127,408 bytes
                   0 Dir(s)   5,202,309,120 bytes free
    >
    C:\tmp>cat dict.sql dict.out
    standard cat took 1.984000 seconds
    big buffer cat took 0.000000 seconds
    >
    C:\tmp>type cat.c
    #include <stdio.h>
    #include <stdlib.h>
    #include <time.h>
    void            catfilebuffer(F ILE * in, FILE * out)
    {
        register int    num_char;
    >
        setvbuf(in, NULL, _IOFBF, 1024 * 16);
        setvbuf(out, NULL, _IOFBF, 1024 * 16);
    >
        /* Get characters */
        while ((num_char = getc(in)) != EOF) {
            /* Print to standard output */
            putc(num_char, out);
        }
    >
    }
    >
    void            catfilenobuff(F ILE * in, FILE * out)
    {
        register int    num_char;
    >
        /* Get characters */
        while ((num_char = getc(in)) != EOF) {
            /* Print to standard output */
            putc(num_char, out);
        }
    >
    }
    >
    int             main(int argc, char **argv)
    {
        FILE           *in = stdin;
        FILE           *out = stdout;
        clock_t         start,
                        end;
        static const double  cps = 1.0 / CLOCKS_PER_SEC;
        if (argc 1) {
            in = fopen(argv[1], "r");
            if (in == NULL) {
                printf("Error opening %s\n", argv[1]);
                exit(EXIT_FAILU RE);
            }
        }
        if (argc 2) {
            out = fopen(argv[2], "w");
            if (out == NULL) {
                printf("Error opening %s\n", argv[2]);
                exit(EXIT_FAILU RE);
            }
        }
        start = clock();
        catfilenobuff(i n, out);
        end = clock();
        printf("standar d cat took %f seconds\n", (end - start) * cps);
        start = clock();
        catfilebuffer(i n, out);
        end = clock();
        printf("big buffer cat took %f seconds\n", (end - start) * cps);
        fflush(NULL);
        return 0;
    >
    }- Hide quoted text -
    >
    - Show quoted text -
    >
    #include <stdio.h>
    #include <stdlib.h>
    #include <time.h>
    void            catfilebuffer(F ILE * in, FILE * out)
    {
        register int    num_char;
    >
        setvbuf(in, NULL, _IOFBF, 1024 * 16);
        setvbuf(out, NULL, _IOFBF, 1024 * 16);
    >
        /* Get characters */
        while ((num_char = getc(in)) != EOF) {
            /* Print to standard output */
            putc(num_char, out);
        }
    >
    }
    >
    void            catfilenobuff(F ILE * in, FILE * out)
    {
        register int    num_char;
    >
        /* Get characters */
        while ((num_char = getc(in)) != EOF) {
            /* Print to standard output */
            putc(num_char, out);
        }
    >
    }
    >
    int             main(int argc, char **argv)
    {
        FILE           *in = stdin;
        FILE           *out = stdout;
        clock_t         start,
                        end;
        static const double cps = 1.0 / CLOCKS_PER_SEC;
        if (argc 1) {
            in = fopen(argv[1], "r");
            if (in == NULL) {
                printf("Error opening %s\n", argv[1]);
                exit(EXIT_FAILU RE);
            }
        }
        if (argc 2) {
            out = fopen(argv[2], "w");
            if (out == NULL) {
                printf("Error opening %s\n", argv[2]);
                exit(EXIT_FAILU RE);
            }
        }
        start = clock();
        catfilenobuff(i n, out);
        end = clock();
        printf("standar d cat took %f seconds\n", (end - start) * cps);
        rewind(in);
        fclose(out);
        if (argc 2) {
            out = fopen(argv[2], "w");
            if (out == NULL) {
                printf("Error opening %s\n", argv[2]);
                exit(EXIT_FAILU RE);
            }
        } else
            out = stdout;
        start = clock();
        catfilebuffer(i n, out);
        end = clock();
        printf("big buffer cat took %f seconds\n", (end - start) * cps);
        fflush(NULL);
        return 0;}
    >
    /*
    Not nearly so dramatic! ;-)
    C:\tmp>cat dict.sql dict.out
    standard cat took 1.968000 seconds
    big buffer cat took 1.891000 seconds
    */- Hide quoted text -
    >
    - Show quoted text -
    The real problem here is that catfilebufferfg ets() needs to be made
    much more robust. But it does lend a lot of speed (probably due to
    the much lower number of function calls and tests).

    #include <stdio.h>
    #include <stdlib.h>
    #include <time.h>

    static char string[1024 * 16];

    void catfilebufferfg ets(FILE * in, FILE * out)
    {
    setvbuf(in, NULL, _IOFBF, 1024 * 16);
    setvbuf(out, NULL, _IOFBF, 1024 * 16);

    /* Get characters (ERROR PRONE: {what if string 16K}) */
    while (fgets(string, sizeof string, in)) {
    fputs(string, out);
    }
    }

    void catfilebuffer(F ILE * in, FILE * out)
    {
    register int num_char;

    setvbuf(in, NULL, _IOFBF, 1024 * 16);
    setvbuf(out, NULL, _IOFBF, 1024 * 16);

    /* Get characters */
    while ((num_char = getc(in)) != EOF) {
    /* Print to standard output */
    putc(num_char, out);
    }
    }

    void catfilenobuff(F ILE * in, FILE * out)
    {
    register int num_char;

    /* Get characters */
    while ((num_char = getc(in)) != EOF) {
    /* Print to standard output */
    putc(num_char, out);
    }
    }

    int main(int argc, char **argv)
    {
    FILE *in = stdin;
    FILE *out = stdout;
    clock_t start,
    end;
    static const double cps = 1.0 / CLOCKS_PER_SEC;
    if (argc 1) {
    in = fopen(argv[1], "r");
    if (in == NULL) {
    printf("Error opening %s\n", argv[1]);
    exit(EXIT_FAILU RE);
    }
    }
    if (argc 2) {
    out = fopen(argv[2], "w");
    if (out == NULL) {
    printf("Error opening %s\n", argv[2]);
    exit(EXIT_FAILU RE);
    }
    }
    start = clock();
    catfilenobuff(i n, out);
    end = clock();
    printf("standar d cat took %f seconds\n", (end - start) * cps);
    rewind(in);
    fclose(out);
    if (argc 2) {
    out = fopen(argv[2], "w");
    if (out == NULL) {
    printf("Error opening %s\n", argv[2]);
    exit(EXIT_FAILU RE);
    }
    } else
    out = stdout;
    start = clock();
    catfilebuffer(i n, out);
    end = clock();
    printf("big buffer cat took %f seconds\n", (end - start) * cps);
    rewind(in);
    fclose(out);
    if (argc 2) {
    out = fopen(argv[2], "w");
    if (out == NULL) {
    printf("Error opening %s\n", argv[2]);
    exit(EXIT_FAILU RE);
    }
    } else
    out = stdout;
    start = clock();
    catfilebufferfg ets(in, out);
    end = clock();
    printf("big buffer cat using fgets took %f seconds\n", (end -
    start) * cps);
    fflush(NULL);
    return 0;
    }
    /*
    C:\tmp>cat dict.sql dict.out
    standard cat took 2.062000 seconds
    big buffer cat took 2.016000 seconds
    big buffer cat using fgets took 0.203000 seconds
    */

    Comment

    • user923005

      #17
      Re: cat

      On Mar 6, 12:44 pm, user923005 <dcor...@connx. comwrote:
      [snip]
      #include <stdio.h>
      #include <stdlib.h>
      #include <time.h>
      >
      static char     string[1024 * 16];
      >
      void            catfilebufferfg ets(FILE * in, FILE * out)
      {
          setvbuf(in, NULL, _IOFBF, 1024 * 16);
          setvbuf(out, NULL, _IOFBF, 1024 * 16);
      >
          /* Get characters (ERROR PRONE: {what if string 16K}) */
          while (fgets(string, sizeof string, in)) {
              fputs(string, out);
          }
      >
      }
      [snip]
      /*
      C:\tmp>cat dict.sql dict.out
      standard cat took 2.062000 seconds
      big buffer cat took 2.016000 seconds
      big buffer cat using fgets took 0.203000 seconds
      */

      Another important difference is that the fgets() version only works on
      text files (for obvious reasons).

      Comment

      • CBFalconer

        #18
        Re: cat

        SM Ryan wrote:
        >
        .... snip bad quote marks ...
        >
        The system cat exploits features of the specific system that
        are not available in ANSI C. For example on unix, you can
        avoid stdio altogether, and do something like
        read -shared buffer -write
        You can largely do that in ANSI C, with streams. Just use getc()
        and putc(). This is why these routines can be implemented as
        macros. The time cost of transferring a buffer content must be
        negligible compared with the cost of reading/writing disk (or
        other) files.

        --
        [mail]: Chuck F (cbfalconer at maineline dot net)
        [page]: <http://cbfalconer.home .att.net>
        Try the download section.



        --
        Posted via a free Usenet account from http://www.teranews.com

        Comment

        • Jag

          #19
          Re: cat

          On Mar 6, 8:49 pm, user923005 <dcor...@connx. comwrote:
          On Mar 6, 12:44 pm, user923005 <dcor...@connx. comwrote:
          [snip]
          >
          >
          >
          #include <stdio.h>
          #include <stdlib.h>
          #include <time.h>
          >
          static char string[1024 * 16];
          >
          void catfilebufferfg ets(FILE * in, FILE * out)
          {
          setvbuf(in, NULL, _IOFBF, 1024 * 16);
          setvbuf(out, NULL, _IOFBF, 1024 * 16);
          >
          /* Get characters (ERROR PRONE: {what if string 16K}) */
          while (fgets(string, sizeof string, in)) {
          fputs(string, out);
          }
          >
          }
          [snip]
          /*
          C:\tmp>cat dict.sql dict.out
          standard cat took 2.062000 seconds
          big buffer cat took 2.016000 seconds
          big buffer cat using fgets took 0.203000 seconds
          */
          >
          Another important difference is that the fgets() version only works on
          text files (for obvious reasons).
          This code, like cat -n and cat -e outputs the line number or adds a $
          at the end of the line. getline is a gnu extension.I haven't used
          setvbuf before. what does it do? anyway, without setvbuf(), it
          resulted into 2.580000 seconds but with setvbuf(), it resulted into
          1.230000 seconds. Thanks for the tip :)

          int catline(FILE *in, FILE *out, int nCounter,
          bool const bLine, bool const bEnds) {

          char *pLine = NULL;
          size_t pLen = 0;

          setvbuf(in, NULL, _IOFBF, BUFSIZ);
          setvbuf(out, NULL, _IOFBF, BUFSIZ);

          while (!feof(in)) {
          /*Get line*/
          if (getline(&pLine , &pLen, in) >= 0) {

          if (bLine == true)
          /*Print string to standard output*/
          fprintf(out, "%6d %s", nCounter, pLine);
          /*Remove '\n' add a $ at the end of the line*/
          else if (bEnds == true) {
          if (pLine[strlen(pLine) - 1] == '\n') {
          pLine[strlen(pLine) - 1] = pLine[strlen(pLine)];
          pLine[strlen(pLine)] = '\0';
          fprintf(out, "%s$\n", pLine);
          }
          /*If there is no '\n' at the end of the line add a $
          without a '\n'*/
          else
          fprintf(out, "%s$", pLine);
          }

          /*Increment line counter*/
          ++nCounter;
          }
          }
          /*Free allocated memory in getline()*/
          free(pLine);

          return(nCounter );
          }

          Comment

          • Jag

            #20
            Re: cat

            On Mar 6, 8:49 pm, user923005 <dcor...@connx. comwrote:
            On Mar 6, 12:44 pm, user923005 <dcor...@connx. comwrote:
            [snip]
            >
            >
            >
            #include <stdio.h>
            #include <stdlib.h>
            #include <time.h>
            >
            static char string[1024 * 16];
            >
            void catfilebufferfg ets(FILE * in, FILE * out)
            {
            setvbuf(in, NULL, _IOFBF, 1024 * 16);
            setvbuf(out, NULL, _IOFBF, 1024 * 16);
            >
            /* Get characters (ERROR PRONE: {what if string 16K}) */
            while (fgets(string, sizeof string, in)) {
            fputs(string, out);
            }
            >
            }
            [snip]
            /*
            C:\tmp>cat dict.sql dict.out
            standard cat took 2.062000 seconds
            big buffer cat took 2.016000 seconds
            big buffer cat using fgets took 0.203000 seconds
            */
            >
            Another important difference is that the fgets() version only works on
            text files (for obvious reasons).
            Oh, I also used setvbuf() in my original post and yielded: no
            setvbuf() 2.440000, with setvbuf(), 0.920000.
            WOW.

            void catfile(FILE *in, FILE *out) {
            register int num_char;

            setvbuf(in, NULL, _IOFBF, BUFSIZ);
            setvbuf(out, NULL, _IOFBF, BUFSIZ);

            /*Get characters*/
            while ((num_char = getc(in)) != EOF) {
            /*Print to standard output*/
            putc(num_char, out);
            }
            }

            Comment

            • Falcon Kirtaran

              #21
              Re: cat

              Jag wrote:
              I've read parts of K&R's ANSI C v2 and this is what their cat looked
              like but when I compared the speed of this code to gnu cat, it seems
              very slow. How do I optimize this for greater speeds? is there an
              alternative algorithm?
              >
              void catfile(FILE *in, FILE *out) {
              register int num_char;
              >
              /*Get characters*/
              while ((num_char = getc(in)) != EOF) {
              /*Print to standard output*/
              putc(num_char, out);
              }
              }
              >
              Thanks.
              It's fairly inefficient to get characters one by one. If you felt like
              using system calls to do it, you could use read(), but then you couldn't
              use FILE *. However, the only thing you really need to do is increase
              your buffer size (from one), and thus you could use fgets(). 4096
              bytes, if I recall, is a fairly standard block size; it's the most
              efficient to read full blocks at a time.

              Remember to initialise your variables!

              #include <string.h>
              #include <stdio.h>
              #include <stdlib.h>

              void catfile(FILE * in, FILE * out) {
              char * buf = (char *)malloc(4097); //4096 + 1 for \0
              if (!buf) exit(-1);
              register int num_char = 0;

              while (!feof(in)) {
              if (!fgets(buf, 4097, in)) break;
              num_char += strlen(buf);
              fprintf(out, "%s", buf);
              };

              free(buf);
              };

              --
              --Falcon Kirtaran

              Comment

              • Richard Heathfield

                #22
                Re: cat

                Falcon Kirtaran said:

                <snip>
                >
                It's fairly inefficient to get characters one by one. [...]
                However, the only thing you really need to do is increase
                your buffer size (from one), and thus you could use fgets().
                Typically, input is buffered by default, so it doesn't actually make any
                difference.

                I compiled your function (after translating it into C90), removed the
                spurious semicolon, and added a main. I then used it to cat a 4MB JPEG.

                real 0m1.484s
                user 0m0.010s
                sys 0m0.210s

                Compare:

                void catfile(FILE * in, FILE * out) {
                int ch;
                while((ch = getc(in)) != EOF)
                {
                putc(ch, out);
                }
                }

                real 0m0.633s
                user 0m0.540s
                sys 0m0.070s

                --
                Richard Heathfield <http://www.cpax.org.uk >
                Email: -http://www. +rjh@
                Google users: <http://www.cpax.org.uk/prg/writings/googly.php>
                "Usenet is a strange place" - dmr 29 July 1999

                Comment

                • Herbert Rosenau

                  #23
                  Re: Braces or not [Re: cat]

                  On Thu, 6 Mar 2008 08:38:09 UTC, Micah Cowan <micah@micah.co wan.name>
                  wrote:
                  Richard Heathfield <rjh@see.sig.in validwrites:
                  >
                  If that's what you mean, then my answer is:
                  - It's not appreciably harder to add braces later than it is to put
                  them in in the first place.
                  Agreed. BUT - it is appreciably harder to remember to add them later on
                  special occasions than to put them in every time as a matter of habit.
                  >
                  Hm. I haven't found it to be so.
                  >
                  while (c)
                  c=do_it(c);
                  c=do_another_th ing(c);
                  >
                  looks too broken right away for me not to notice it (though, perhaps
                  now that I'm doing more Python coding work these days, that may
                  change?).
                  >
                  I used to actually always put the braces in. I've fallen out of that
                  practice, just because I find it slightly more readable without, for
                  one-line bodies.
                  >
                  Uh, a halfways intelligent editor will help in writing/editing source.

                  So my editor is set up expanding 'while' to

                  while (_) {
                  }

                  setting the cursor at the position represented by the underline
                  charater. Leaving the condition with TAB will insert an empty line,
                  placing the cursor in the new linedirectly under the 'l' from while,
                  so new indent is done, ready to type. Enter will insert a new line,
                  holding the same indent. Shift Enter in insert mode will insert a new
                  line under the closing bracket and the cursor under it.

                  Equivalence is given for do, for an so on magically. So conditional
                  blocks are magically written, indending is done automatically.

                  The behavior of enter, TAB and opening brace characters changes
                  depending on the insert|override mode, Enter, shift enter, Ctrl Enter
                  and Alöt Enter have different mode too. So typing a new program gets
                  easy, edit it too.

                  So leaving a block off from typing is at least more hard than having
                  it already. Indent is set magically, so misleading gets harder having
                  it right.

                  --
                  Tschau/Bye
                  Herbert

                  Visit http://www.ecomstation.de the home of german eComStation
                  eComStation 1.2R Deutsch ist da!

                  Comment

                  Working...