Maximum memory allocation problem

Discussion created by vanja_z on Jan 14, 2012
Latest reply on Aug 16, 2013 by himanshu.gautam

There appears to be a bug limiting the maximum memory allocation to around 60% of device memory. According to this knowledge base article:

by default, the memory made available to OpenCL is limited to 50% and it should be possible to increase this to 100% by setting the environmental variable GPU_MAX_HEAP_SIZE to a value between 0 and 100. In my tests this has not worked as expected. The amount of memory reported using CL_DEVICE_GLOBAL_MEM_SIZE does match the expected value however the actual amount of memory able to be allocated does not.


I have tested this by allocating and initializing small buffers (20MB) until failure. Regardless of settings, the actual amount of memory available tops out at around 60%. I have included the code used for testing and would be interested to hear if other peoples installations behave similarly. For your reference, the Nvidia implementation does not suffer from this problem and using their hardware/driver I can allocate very close to the total device memory (even in a single buffer I might add).

Here are the results on my machine with specs:

2 x HD6950 2GB
Arch Linux Driver 11.12
SDK 2.6


Global memory size: 1073 MB
Accessed 1080 MB

Global memory size: 1180 MB
Accessed 1180 MB

Global memory size: 1287 MB
Accessed 1260 MB

Global memory size: 1502 MB
Accessed 1260 MB

Global memory size: 2147 MB
Accessed 1260 MB


Looking forward to hearing anyone elses experience or an official response.



 * vzcl_maxalloc.c
 * Copyright 2012 Vanja Zecevic
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * GNU General Public License for more details.
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <>.

#include <stdio.h>
#include <CL/cl.h>

int main (int argc, char * argv[])
int chunk = 10;
int maxmem = 2000;
int iChunk;
int nChunk;
int iX;
int iArg;
int nX;
int nAccessed;

cl_platform_id platform;
cl_device_id device;
cl_context Context;
cl_command_queue CmdQueue;
cl_int err_tr = CL_SUCCESS;
cl_ulong global_size;

cl_mem * buffers_dev;
int ** buffers_host;

/* Get flags.  */
if (argc <=6 ) {
    for (iArg=1; iArg<argc; iArg++) {
        if (!strcmp(argv[iArg],"--help")) {
  "vzcl_maxalloc <flags>\n"
  "prints the ammount of available memory on an OpenCL device\n"
  "reported by clGetDeviceInfo and also the actual ammount able to be\n"
  "vzcl_maxalloc --chunk 10 --maxmem 2000\n"
  "--help   Prints this message\n"
  "--chunk  The size of each chunk to be allocated in MB (default 10 MB)\n"
  "--maxmem The maximum memory to allocate in MB (default 2000 MB)\n"
        else if (!strcmp(argv[iArg],"--chunk"))  chunk  = atoi(argv[iArg+1]); 
        else if (!strcmp(argv[iArg],"--maxmem")) maxmem = atoi(argv[iArg+1]);
nChunk = maxmem/chunk;
nX = (chunk*(int)1e6)/sizeof(int);

/* Initialize OpenCL devices.  */
err_tr = clGetPlatformIDs(1, &platform, NULL);
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
Context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL);
CmdQueue = clCreateCommandQueue(Context, device, 0, NULL);

clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(cl_ulong),
  &global_size, NULL);
printf("Created OpenCL context.\n"
       "Global memory size: %li MB\n",

/* First allocate buffers.  */
buffers_dev = (cl_mem*)malloc(nChunk*sizeof(cl_mem));
for (iChunk=0; iChunk<nChunk; iChunk++) {
    *(buffers_dev+iChunk) = clCreateBuffer(Context, CL_MEM_READ_WRITE,
      nX*sizeof(int), NULL, &err_tr);
    if (err_tr != CL_SUCCESS) {
        /*printf("error %i\n", err_tr);*/
printf("Allocated %i MB\n", iChunk*nX*sizeof(int)/(int)1e6);
nChunk = iChunk;

/* Now try to access buffers.  */
buffers_host = (int**)malloc(nChunk*sizeof(int*));
for (iChunk=0; iChunk<nChunk; iChunk++) {
    *(buffers_host+iChunk) = (int*)malloc(nX*sizeof(int));
    for (iX=0; iX<nX; iX++) *(*(buffers_host+iChunk)+iX) = 0;
    err_tr = clEnqueueWriteBuffer(CmdQueue, *(buffers_dev+iChunk), CL_TRUE, 0,
      nX*sizeof(int), *(buffers_host+iChunk), 0, NULL, NULL);
    if (err_tr != CL_SUCCESS) {
        /*printf("error %i\n", err_tr);*/
printf("Accessed %i MB\n", iChunk*nX*sizeof(int)/(int)1e6);
nAccessed = iChunk;

for (iChunk=0; iChunk<nAccessed; iChunk++)
for (iChunk=0; iChunk<nChunk; iChunk++)

return 0;


Message was edited by: vanja z (fixed formatting for new forum)