First of all, I apologize for cross-posting. I posted this on the
framework.inter op group as well, but haven't received a response yet, so I
thought I'd try here...
I'm trying to write an app that perfroms some work using the CUDA SDK (for
programming the nVidia GPU).
One of the first methods I wrote is simply to query information about the
CUDA devices (video cards).
The code is very straightforward , but I'm getting an error that indicates
some sort of memory corruption and I'm not really sure how it's happening. I
don't think it's CUDA related, I think it's just something wrong with my
interop calls. Maybe the arrays? The exception I'm getting happens during
the return from GetCUDADevicePr operties (see implementation below and
comment on line that throws exception). It appears to be happening in the
marshalling back of the data. The exception is:
"An unhandled exception of type 'System.AccessV iolationExcepti on' occurred
in NeuralNetTiming Test.exe
Additional information: Attempted to read or write protected memory. This is
often an indication that other memory is corrupt."
When I step through the code, GetCUDADevicePr operties() appears to properly
set the fields in the CUDADevice structure.
Here's the relevant code:
From C#:
* The device information structure is this:
[StructLayout(La youtKind.Sequen tial)]
public struct CUDADevice
{
public int deviceID;
public int totalMem;
public int numMultiProcess ors;
public int numCores;
public int constantMem;
public int sharedMem;
public int registersPerBlo ck;
public int warpSize;
public int numThreadsPerBl ock;
public int[] maxBlockDimensi ons;
public int[] maxGridDimensio ns;
}
* The DllImport is:
[DllImport("Neur alNetCUDALib")]
extern static bool GetCUDADevicePr operties(ref CUDADevice device);
* And the C# code that calls it is:
// Allocate the device structure
CUDADevice dev = new CUDADevice();
// Allocate the two arrays
dev.maxBlockDim ensions = new int[3];
dev.maxGridDime nsions = new int[3];
// Device to query
dev.deviceID = 0;
GetCUDADevicePr operties(ref dev);
* The non-CUDA C++ code is:
* From the .h, the C++ side of the CUDADevice structure
typedef struct tagCUDADevice
{
int deviceID;
int totalMem;
int numMultiProcess ors;
int numCores;
int constantMem;
int sharedMem;
int registersPerBlo ck;
int warpSize;
int numThreadsPerBl ock;
int maxBlockDimensi ons[3];
int maxGridDimensio ns[3];
} CUDADevice;
extern "C" bool GetCUDADevicePr operties(CUDADe vice *pDevice)
{
if (GetDeviceCount () <= 0)
{
return false;
}
if (!QueryDeviceIn fo(pDevice->deviceID,
pDevice->totalMem,
pDevice->numMultiProces sors,
pDevice->numCores,
pDevice->constantMem,
pDevice->sharedMem,
pDevice->registersPerBl ock,
pDevice->warpSize,
pDevice->numThreadsPerB lock,
pDevice->maxBlockDimens ions,
pDevice->maxGridDimensi ons))
{
return false;
}
return true; // Exception happens during this return!
}
* And the actual CUDA code is:
bool QueryDeviceInfo (int deviceNum,
int &totalMem,
int &numMultiProces sors,
int &numCores,
int &constantMem ,
int &sharedMem,
int ®istersPerBl ock,
int &warpSize,
int &numThreadsPerB lock,
int *maxBlockDimens ions,
int *maxGridDimensi ons)
{
cudaDeviceProp prop;
cudaGetDevicePr operties(&prop, deviceNum);
totalMem = (int) prop.totalGloba lMem;
numMultiProcess ors = (int) prop.multiProce ssorCount;
numCores = (int) numMultiProcess ors * 8;
constantMem = (int) prop.totalConst Mem;
sharedMem = (int) prop.sharedMemP erBlock;
registersPerBlo ck = (int) prop.regsPerBlo ck;
warpSize = (int) prop.warpSize;
numThreadsPerBl ock = (int) prop.maxThreads PerBlock;
maxBlockDimensi ons[0] = (int) prop.maxThreads Dim[0];
maxBlockDimensi ons[1] = (int) prop.maxThreads Dim[1];
maxBlockDimensi ons[2] = (int) prop.maxThreads Dim[2];
maxGridDimensio ns[0] = (int) prop.maxGridSiz e[0];
maxGridDimensio ns[1] = (int) prop.maxGridSiz e[1];
maxGridDimensio ns[2] = (int) prop.maxGridSiz e[2];
return true;
}
framework.inter op group as well, but haven't received a response yet, so I
thought I'd try here...
I'm trying to write an app that perfroms some work using the CUDA SDK (for
programming the nVidia GPU).
One of the first methods I wrote is simply to query information about the
CUDA devices (video cards).
The code is very straightforward , but I'm getting an error that indicates
some sort of memory corruption and I'm not really sure how it's happening. I
don't think it's CUDA related, I think it's just something wrong with my
interop calls. Maybe the arrays? The exception I'm getting happens during
the return from GetCUDADevicePr operties (see implementation below and
comment on line that throws exception). It appears to be happening in the
marshalling back of the data. The exception is:
"An unhandled exception of type 'System.AccessV iolationExcepti on' occurred
in NeuralNetTiming Test.exe
Additional information: Attempted to read or write protected memory. This is
often an indication that other memory is corrupt."
When I step through the code, GetCUDADevicePr operties() appears to properly
set the fields in the CUDADevice structure.
Here's the relevant code:
From C#:
* The device information structure is this:
[StructLayout(La youtKind.Sequen tial)]
public struct CUDADevice
{
public int deviceID;
public int totalMem;
public int numMultiProcess ors;
public int numCores;
public int constantMem;
public int sharedMem;
public int registersPerBlo ck;
public int warpSize;
public int numThreadsPerBl ock;
public int[] maxBlockDimensi ons;
public int[] maxGridDimensio ns;
}
* The DllImport is:
[DllImport("Neur alNetCUDALib")]
extern static bool GetCUDADevicePr operties(ref CUDADevice device);
* And the C# code that calls it is:
// Allocate the device structure
CUDADevice dev = new CUDADevice();
// Allocate the two arrays
dev.maxBlockDim ensions = new int[3];
dev.maxGridDime nsions = new int[3];
// Device to query
dev.deviceID = 0;
GetCUDADevicePr operties(ref dev);
* The non-CUDA C++ code is:
* From the .h, the C++ side of the CUDADevice structure
typedef struct tagCUDADevice
{
int deviceID;
int totalMem;
int numMultiProcess ors;
int numCores;
int constantMem;
int sharedMem;
int registersPerBlo ck;
int warpSize;
int numThreadsPerBl ock;
int maxBlockDimensi ons[3];
int maxGridDimensio ns[3];
} CUDADevice;
extern "C" bool GetCUDADevicePr operties(CUDADe vice *pDevice)
{
if (GetDeviceCount () <= 0)
{
return false;
}
if (!QueryDeviceIn fo(pDevice->deviceID,
pDevice->totalMem,
pDevice->numMultiProces sors,
pDevice->numCores,
pDevice->constantMem,
pDevice->sharedMem,
pDevice->registersPerBl ock,
pDevice->warpSize,
pDevice->numThreadsPerB lock,
pDevice->maxBlockDimens ions,
pDevice->maxGridDimensi ons))
{
return false;
}
return true; // Exception happens during this return!
}
* And the actual CUDA code is:
bool QueryDeviceInfo (int deviceNum,
int &totalMem,
int &numMultiProces sors,
int &numCores,
int &constantMem ,
int &sharedMem,
int ®istersPerBl ock,
int &warpSize,
int &numThreadsPerB lock,
int *maxBlockDimens ions,
int *maxGridDimensi ons)
{
cudaDeviceProp prop;
cudaGetDevicePr operties(&prop, deviceNum);
totalMem = (int) prop.totalGloba lMem;
numMultiProcess ors = (int) prop.multiProce ssorCount;
numCores = (int) numMultiProcess ors * 8;
constantMem = (int) prop.totalConst Mem;
sharedMem = (int) prop.sharedMemP erBlock;
registersPerBlo ck = (int) prop.regsPerBlo ck;
warpSize = (int) prop.warpSize;
numThreadsPerBl ock = (int) prop.maxThreads PerBlock;
maxBlockDimensi ons[0] = (int) prop.maxThreads Dim[0];
maxBlockDimensi ons[1] = (int) prop.maxThreads Dim[1];
maxBlockDimensi ons[2] = (int) prop.maxThreads Dim[2];
maxGridDimensio ns[0] = (int) prop.maxGridSiz e[0];
maxGridDimensio ns[1] = (int) prop.maxGridSiz e[1];
maxGridDimensio ns[2] = (int) prop.maxGridSiz e[2];
return true;
}
Comment