Source code for topo.gpu.projection

import numpy as np
import param

from topo.sparse.sparsecf import SparseCFProjection, SparseConnectionField

try:
    import pycuda.gpuarray as gpuarray
    from pycuda.elementwise import ElementwiseKernel
    import pycuda.driver as cuda
    import pycuda.autoinit                   # pyflakes:ignore (API import)
    import scikits.cuda.cusparse as cusparse

    cusparse.init()
except:
    pass


[docs]def CFPOF_DivisiveNormalizeL1_Sparse_GPU(projection): """ Divisive normalisation computed on the GPU """ if not projection.has_norm_total: projection.weights_gpu.mv(projection.norm_ones_gpu, y=projection.norm_total_gpu, autosync=False) projection.norm_total_gpu = 1.0/projection.norm_total_gpu projection.normalize_kernel(projection.nzrows_gpu, projection.norm_total_gpu, projection.weights_gpu.Val, range=slice(0, projection.nzcount, 1)) projection.has_norm_total = False
[docs]def CFPLF_Hebbian_Sparse_GPU(projection): """ Sparse CF Projection learning function applying Hebbian learning to the weights in a projection. """ single_conn_lr = projection.learning_rate/projection.n_units # Transfering source and destination activities: src_activity_gpu = gpuarray.to_gpu_async(np.ravel(projection.src.activity).astype(np.float32)) dest_activity_gpu = gpuarray.to_gpu_async(np.ravel(projection.dest.activity).astype(np.float32)) # Computing Hebbian learning weights: projection.hebbian_kernel(single_conn_lr, projection.nzrows_gpu, projection.nzcols_gpu, src_activity_gpu, dest_activity_gpu, projection.weights_gpu.Val, range=slice(0, projection.nzcount, 1)) # Normalisation values: projection.weights_gpu.mv(projection.norm_ones_gpu, y=projection.norm_total_gpu, autosync=False) projection.has_norm_total = True
[docs]def CFPRF_DotProduct_Sparse_GPU(projection): """ Sparse CF Projection response function calculating the dot-product between incoming activities and CF weights. Uses GPU. """ projection.input_buffer_pagelocked[:] = np.ravel(projection.input_buffer).astype(np.float32) projection.input_buffer_gpu = gpuarray.to_gpu_async(projection.input_buffer_pagelocked, stream=projection.pycuda_stream) projection.weights_gpu.mv(projection.input_buffer_gpu, alpha=projection.strength, y=projection.activity_gpu_buffer, autosync=False, stream=projection.pycuda_stream) projection.activity_gpu_buffer.get_async(ary=projection.activity, stream=projection.pycuda_stream)
[docs]class GPUSparseCFProjection(SparseCFProjection): """ A projection composed of SparseConnectionFields from a Sheet into a ProjectionSheet, calculated using a GPU. Any subclass has to implement the interface activate(self) that computes the response from the input and stores it in the activity array. """ cf_type = param.Parameter(default=SparseConnectionField,doc=""" Type of ConnectionField to use when creating individual CFs.""") learning_fn = param.Callable(default=CFPLF_Hebbian_Sparse_GPU,doc=""" Function for computing changes to the weights based on one activation step.""") response_fn = param.Callable(default=CFPRF_DotProduct_Sparse_GPU,doc=""" Function for computing the Projection response to an input pattern.""") weights_output_fns = param.HookList(default=[CFPOF_DivisiveNormalizeL1_Sparse_GPU],doc=""" Functions applied to each CF after learning.""") initialized = param.Boolean(default=False) def __init__(self,**params): #Hack-ish way to avoid initialisation until the weights are transfered: should_apply = self.apply_output_fns_init params['apply_output_fns_init'] = False super(GPUSparseCFProjection,self).__init__(**params) # The sparse matrix is stored in COO format, used for Hebbian learning and normalisation: nzcols, nzrows, values = self.weights.getTriplets() tups = sorted(zip(nzrows, nzcols, values)) nzrows = np.array([x[0] for x in tups], np.int32) nzcols = np.array([x[1] for x in tups], np.int32) values = np.array([x[2] for x in tups], np.float32) # Getting them on the GPU: self.nzcount = self.weights.getnnz() self.nzrows_gpu = gpuarray.to_gpu(nzrows) self.nzcols_gpu = gpuarray.to_gpu(nzcols) # Setting the projection weights in CSR format for dot product calculation: rowPtr = cusparse.coo2csr(self.nzrows_gpu, self.weights.shape[1]) descrA = cusparse.cusparseCreateMatDescr() cusparse.cusparseSetMatType(descrA, cusparse.CUSPARSE_MATRIX_TYPE_GENERAL) cusparse.cusparseSetMatIndexBase(descrA, cusparse.CUSPARSE_INDEX_BASE_ZERO) self.weights_gpu = cusparse.CSR(descrA, values, rowPtr, self.nzcols_gpu, (self.weights.shape[1], self.weights.shape[0])) # Allocating a page-locked piece of memory for the activity so that GPU could transfer data to the # main memory without the involvment of the CPU: self.activity = cuda.pagelocked_empty(self.activity.shape, np.float32) self.activity_gpu_buffer = gpuarray.zeros(shape=(self.weights_gpu.shape[0],), dtype=np.float32) self.input_buffer_pagelocked = cuda.pagelocked_empty(shape=(self.weights_gpu.shape[1],), dtype=np.float32, mem_flags=cuda.host_alloc_flags.WRITECOMBINED) self.input_buffer = gpuarray.zeros(shape=(self.weights_gpu.shape[1], ), dtype=np.float32) self.norm_total_gpu = gpuarray.zeros(shape=(self.weights_gpu.shape[0],), dtype=np.float32) # Helper array for normalization: self.norm_ones_gpu = gpuarray.to_gpu(np.array([1.0] * self.weights_gpu.shape[1], np.float32)) # Kernel that applies the normalisation: self.normalize_kernel = ElementwiseKernel( "int *nzrows, float *norm_total, float *weights", "weights[i] *= norm_total[nzrows[i]]", "divisive_normalize") # Kernel that calculates the learning: self.hebbian_kernel = ElementwiseKernel( "float single_conn_lr, int *row, int *col, float *src_activity, float *dest_activity, float *result", "result[i] += single_conn_lr * src_activity[col[i]] * dest_activity[row[i]]", "hebbian_learning") self.pycuda_stream = cuda.Stream() # Finishing the initialisation that might have been delayed: params['apply_output_fns_init'] = should_apply self.apply_output_fns_init = should_apply if self.apply_output_fns_init: self.apply_learn_output_fns()