I'm working with two libraries, one written
in old school C, that returns a very large
chunk of data in the form of a C-style,
NUL-terminated string.
The other written in a more modern C++
is a parser for the chunk of bytes returned by
the first. It expects a reference to a
std::istream as its argument.
The chunk of data is very large.
I'd like to feed the output of the first to
the second WITHOUT MAKING AN EXTRA IN-MEMORY COPY.
My attempts to create an istringstream from the
chunk of data all seem to at least double the
amount of VM used. Here's a short program demonstrating
what I've tried. Is there any way to get "inside"
the istringstream and tell it to use the 'chunk'
directly, rather than insisting on making a copy?
Thanks,
John Salmon
[jsalmon@river c++]$ cat chararraytostre am.cpp
#include <string>
#include <sstream>
#include <cstdlib>
#include <cstring>
#include <cstdio>
using namespace std;
char *getLotsOfBytes ();
istream& streamParser(is tream &s);
void linuxChkMem(con st char *msg);
void withImplicitStr ing(){
linuxChkMem("Be fore getLotsOfBytes: ");
char *chunk = getLotsOfBytes( );
linuxChkMem("Af ter getLotsOfBytes( ):");
{
istringstream iss(chunk);
linuxChkMem("Af ter iss(p): ");
streamParser(is s);
linuxChkMem("Af ter streamParser(is s): ");
}
linuxChkMem("Af ter iss goes out of scope: ");
free(chunk);
linuxChkMem("Af ter free(p): ");
}
void withExplicitStr ing(){
linuxChkMem("Be fore getLotsOfBytes: ");
char *chunk = getLotsOfBytes( );
linuxChkMem("Af ter getLotsOfBytes( ):");
{
string s(chunk);
linuxChkMem("Af ter s(chunk): ");
free(chunk);
linuxChkMem("Af ter free(p): ");
istringstream iss(s);
linuxChkMem("Af ter iss(s): ");
streamParser(is s);
linuxChkMem("Af ter streamParser(is s): ");
}
linuxChkMem("Af ter iss goes out of scope: ");
}
int main(int argc, char **argv){
printf("with an implicit string constructor\n") ;
withImplicitStr ing();
printf("\nwith an explicit string constructor\n") ;
withExplicitStr ing();
return 0;
}
// On linux, tell us how much data space we're using
// in the VM.
void linuxChkMem(con st char *msg){
printf("%s", msg);
fflush(stdout);
char cmd[50];
sprintf(cmd, "grep VmData /proc/%d/status", getpid());
system(cmd);
}
static const int SZ = 100*1024*1024;
// A rough approximation to getLotsOfBytes. In the
// real application, getLotsOfBytes has these characteristics :
// - it returns a malloced pointer to a NUL-terminated array of chars.
// - it is out of my control. E.g., I can't rewrite it in a way
// that might be more friendly to C++ streams.
char *getLotsOfBytes (){
char *p = (char *)malloc(SZ);
memset(p, ' ', SZ);
strcpy(p+SZ-50, "3.1415 2.718 1.414");
return p;
}
// A rough approximation to streamParser. In the real
// application, streamParser takes a ref to an istream
// and does what it does. Again, I can't easily redefine
// the interface.
istream& streamParser(is tream& s){
double x, y, z;
s > x >y >z;
printf("x: %f y: %f z: %f\n", x, y, z);
return s;
}
[jsalmon@river c++]$ g++ -O3 chararraytostre am.cpp
[jsalmon@river c++]$ a.out
with an implicit string constructor
Before getLotsOfBytes: VmData: 40 kB
After getLotsOfBytes( ):VmData: 102444 kB
After iss(p): VmData: 204848 kB
x: 3.141500 y: 2.718000 z: 1.414000
After streamParser(is s): VmData: 204980 kB
After iss goes out of scope: VmData: 102576 kB
After free(p): VmData: 172 kB
with an explicit string constructor
Before getLotsOfBytes: VmData: 172 kB
After getLotsOfBytes( ):VmData: 102576 kB
After s(chunk): VmData: 204980 kB
After free(p): VmData: 102576 kB
After iss(s): VmData: 204980 kB
x: 3.141500 y: 2.718000 z: 1.414000
After streamParser(is s): VmData: 204980 kB
After iss goes out of scope: VmData: 172 kB
[jsalmon@river c++]$
Comment