diff --git a/TensorMath.lua b/TensorMath.lua index baf640ae..ac3232ad 100644 --- a/TensorMath.lua +++ b/TensorMath.lua @@ -198,69 +198,93 @@ local function wrap(...) method:wrap(unpack(args)) end +local Tensor = "CudaTensor" +local real = "float" + wrap("zero", cname("zero"), - {{name="CudaTensor", returned=true}}) + {{name=Tensor, returned=true}}) wrap("fill", cname("fill"), - {{name="CudaTensor", returned=true}, - {name="float"}}) + {{name=Tensor, returned=true}, + {name=real}}) + +wrap("zeros", + cname("zeros"), + {{name=Tensor, default=true, returned=true, method={default='nil'}}, + {name="LongArg"}}) + + wrap("ones", + cname("ones"), + {{name=Tensor, default=true, returned=true, method={default='nil'}}, + {name="LongArg"}}) + + wrap("reshape", + cname("reshape"), + {{name=Tensor, default=true, returned=true}, + {name=Tensor}, + {name="LongArg"}}) + + wrap("numel", + cname("numel"), + {{name=Tensor}, + {name="long", creturned=true}}) wrap("add", cname("add"), - {{name="CudaTensor", default=true, returned=true, method={default='nil'}}, - {name="CudaTensor", method={default=1}}, - {name="float"}}, + {{name=Tensor, default=true, returned=true, method={default='nil'}}, + {name=Tensor, method={default=1}}, + {name=real}}, cname("cadd"), - {{name="CudaTensor", default=true, returned=true, method={default='nil'}}, - {name="CudaTensor", method={default=1}}, - {name="float", default=1}, - {name="CudaTensor"}}) + {{name=Tensor, default=true, returned=true, method={default='nil'}}, + {name=Tensor, method={default=1}}, + {name=real, default=1}, + {name=Tensor}}) wrap("mul", cname("mul"), - {{name="CudaTensor", default=true, returned=true, method={default='nil'}}, - {name="CudaTensor", method={default=1}}, - {name="float"}}) + {{name=Tensor, default=true, returned=true, method={default='nil'}}, + {name=Tensor, method={default=1}}, + {name=real}}) wrap("div", cname("div"), - {{name="CudaTensor", default=true, returned=true, method={default='nil'}}, - {name="CudaTensor", method={default=1}}, - {name="float"}}) + {{name=Tensor, default=true, returned=true, method={default='nil'}}, + {name=Tensor, method={default=1}}, + {name=real}}) wrap("cmul", cname("cmul"), - {{name="CudaTensor", returned=true, method={default='nil'}}, - {name="CudaTensor", default=1}, - {name="CudaTensor"}}) + {{name=Tensor, returned=true, method={default='nil'}}, + {name=Tensor, default=1}, + {name=Tensor}}) wrap("cdiv", cname("cdiv"), - {{name="CudaTensor", returned=true, method={default='nil'}}, - {name="CudaTensor", default=1}, - {name="CudaTensor"}}) + {{name=Tensor, returned=true, method={default='nil'}}, + {name=Tensor, default=1}, + {name=Tensor}}) wrap("addcmul", cname("addcmul"), - {{name="CudaTensor", default=true, returned=true, method={default='nil'}}, - {name="CudaTensor", method={default=1}}, - {name="float", default=1}, - {name="CudaTensor"}, - {name="CudaTensor"}}) + {{name=Tensor, default=true, returned=true, method={default='nil'}}, + {name=Tensor, method={default=1}}, + {name=real, default=1}, + {name=Tensor}, + {name=Tensor}}) wrap("addcdiv", cname("addcdiv"), - {{name="CudaTensor", default=true, returned=true, method={default='nil'}}, - {name="CudaTensor", method={default=1}}, - {name="float", default=1}, - {name="CudaTensor"}, - {name="CudaTensor"}}) + {{name=Tensor, default=true, returned=true, method={default='nil'}}, + {name=Tensor, method={default=1}}, + {name=real, default=1}, + {name=Tensor}, + {name=Tensor}}) do - local Tensor = "CudaTensor" - local real = "float" + local Tensor = Tensor + local real = real wrap("mv", cname("addmv"), {{name=Tensor, default=true, returned=true, method={default='nil'}, @@ -373,28 +397,37 @@ end wrap("dot", cname("dot"), - {{name="CudaTensor"}, - {name="CudaTensor"}, - {name="float", creturned=true}}) + {{name=Tensor}, + {name=Tensor}, + {name=real, creturned=true}}) wrap("sum", cname("sumall"), - {{name="CudaTensor"}, - {name="float", creturned=true}}, + {{name=Tensor}, + {name=real, creturned=true}}, cname("sum"), - {{name="CudaTensor", default=true, returned=true}, - {name="CudaTensor"}, + {{name=Tensor, default=true, returned=true}, + {name=Tensor}, + {name="index"}}) + +wrap("prod", + cname("prodall"), + {{name=Tensor}, + {name=real, creturned=true}}, + cname("prod"), + {{name=Tensor, default=true, returned=true}, + {name=Tensor}, {name="index"}}) for _,name in ipairs({"min", "max"}) do wrap(name, cname(name .. "all"), - {{name="CudaTensor"}, - {name="float", creturned=true}}, + {{name=Tensor}, + {name=real, creturned=true}}, cname(name), - {{name="CudaTensor", default=true, returned=true}, - {name="CudaTensor", default=true, returned=true}, - {name="CudaTensor"}, + {{name=Tensor, default=true, returned=true}, + {name=Tensor, default=true, returned=true}, + {name=Tensor}, {name="index"}}) end @@ -408,34 +441,34 @@ for _,name in ipairs({"log", "log1p", "exp", wrap(name, cname(name), - {{name="CudaTensor", default=true, returned=true, method={default='nil'}}, - {name="CudaTensor", default=1}}) + {{name=Tensor, default=true, returned=true, method={default='nil'}}, + {name=Tensor, default=1}}) end wrap("pow", cname("pow"), - {{name="CudaTensor", returned=true, method={default='nil'}}, - {name="CudaTensor", default=1}, - {name="float"}}) + {{name=Tensor, returned=true, method={default='nil'}}, + {name=Tensor, default=1}, + {name=real}}) wrap("clamp", cname("clamp"), - {{name="CudaTensor", default=true, returned=true, method={default='nil'}}, - {name="CudaTensor", default=1}, - {name="float"}, - {name="float"}}) + {{name=Tensor, default=true, returned=true, method={default='nil'}}, + {name=Tensor, default=1}, + {name=real}, + {name=real}}) for _,name in pairs({'lt','gt','le','ge','eq','ne'}) do wrap(name, cname(name .. 'Value'), - {{name="CudaTensor", default=true, returned=true}, - {name="CudaTensor"}, - {name="float"}}, + {{name=Tensor, default=true, returned=true}, + {name=Tensor}, + {name=real}}, cname(name .. 'Tensor'), - {{name="CudaTensor", returned=true}, - {name="CudaTensor"}, - {name="CudaTensor"}}) + {{name=Tensor, returned=true}, + {name=Tensor}, + {name=Tensor}}) end for _,f in ipairs({{name='geometric'}, @@ -443,8 +476,8 @@ for _,f in ipairs({{name='geometric'}, wrap(f.name, cname(f.name), - {{name="CudaTensor", returned=true}, - {name="float", default=f.a}}) + {{name=Tensor, returned=true}, + {name=real, default=f.a}}) end for _,f in ipairs({{name='uniform', a=0, b=1}, @@ -454,65 +487,65 @@ for _,f in ipairs({{name='uniform', a=0, b=1}, wrap(f.name, cname(f.name), - {{name="CudaTensor", returned=true}, - {name="float", default=f.a}, - {name="float", default=f.b}}) + {{name=Tensor, returned=true}, + {name=real, default=f.a}, + {name=real, default=f.b}}) end for _,f in ipairs({{name='exponential'}}) do wrap(f.name, cname(f.name), - {{name="CudaTensor", returned=true}, - {name="float", default=f.a}}) + {{name=Tensor, returned=true}, + {name=real, default=f.a}}) end wrap("mean", cname("meanall"), - {{name="CudaTensor"}, - {name="float", creturned=true}}, + {{name=Tensor}, + {name=real, creturned=true}}, cname("mean"), - {{name="CudaTensor", default=true, returned=true}, - {name="CudaTensor"}, + {{name=Tensor, default=true, returned=true}, + {name=Tensor}, {name="index"}}) for _,name in ipairs({"var", "std"}) do wrap(name, cname(name .. "all"), - {{name="CudaTensor"}, - {name="float", creturned=true}}) + {{name=Tensor}, + {name=real, creturned=true}}) end wrap("norm", cname("normall"), - {{name="CudaTensor"}, - {name="float", default=2}, - {name="float", creturned=true}}, + {{name=Tensor}, + {name=real, default=2}, + {name=real, creturned=true}}, cname("norm"), - {{name="CudaTensor", default=true, returned=true}, - {name="CudaTensor"}, - {name="float"}, + {{name=Tensor, default=true, returned=true}, + {name=Tensor}, + {name=real}, {name="index"}}) wrap("renorm", cname("renorm"), - {{name="CudaTensor", default=true, returned=true, method={default='nil'}}, - {name="CudaTensor", method={default=1}}, - {name="float"}, + {{name=Tensor, default=true, returned=true, method={default='nil'}}, + {name=Tensor, method={default=1}}, + {name=real}, {name="index"}, - {name="float"}}) + {name=real}}) wrap("dist", cname("dist"), - {{name="CudaTensor"}, - {name="CudaTensor"}, - {name="float", default=2}, - {name="float", creturned=true}}) + {{name=Tensor}, + {name=Tensor}, + {name=real, default=2}, + {name=real, creturned=true}}) wrap("squeeze", cname("squeeze"), - {{name="CudaTensor", default=true, returned=true, postcall=function(arg) + {{name=Tensor, default=true, returned=true, postcall=function(arg) local txt = {} if arg.returned then table.insert(txt, string.format('if(arg%d->nDimension == 1 && arg%d->size[0] == 1)', arg.i, arg.i)) -- number @@ -520,9 +553,9 @@ wrap("squeeze", end return table.concat(txt, '\n') end}, - {name="CudaTensor"}}, + {name=Tensor}}, cname("squeeze1d"), - {{name="CudaTensor", default=true, returned=true, + {{name=Tensor, default=true, returned=true, postcall= function(arg) local txt = {} @@ -533,7 +566,7 @@ wrap("squeeze", return table.concat(txt, '\n') end}, - {name="CudaTensor", + {name=Tensor, precall= function(arg) return string.format('{int hasdims = arg%d->nDimension > 1;', arg.i) diff --git a/lib/THC/THCTensorMath.cu b/lib/THC/THCTensorMath.cu index 3fcfe566..4f2d8c06 100644 --- a/lib/THC/THCTensorMath.cu +++ b/lib/THC/THCTensorMath.cu @@ -27,6 +27,30 @@ void THCudaTensor_zero(THCudaTensor *self_) THCudaTensor_freeCopyTo(self, self_); } +void THCudaTensor_zeros(THCudaTensor *r_, THLongStorage *size) +{ + THCudaTensor_resize(r_, size, NULL); + THCudaTensor_zero(r_); +} + +void THCudaTensor_ones(THCudaTensor *r_, THLongStorage *size) +{ + THCudaTensor_resize(r_, size, NULL); + THCudaTensor_fill(r_, 1); +} + +void THCudaTensor_reshape(THCudaTensor *r_, THCudaTensor *t, THLongStorage *size) +{ + THCudaTensor_resize(r_, size, NULL); + THCudaTensor_copy(r_, t); +} + +long THCudaTensor_numel(THCudaTensor *t) +{ + return THCudaTensor_nElement(t); +} + + struct addvalue_functor { const float value; @@ -302,6 +326,17 @@ float THCudaTensor_sumall(THCudaTensor *self) return result; } +float THCudaTensor_prodall(THCudaTensor *self) +{ + self = THCudaTensor_newContiguous(self); + thrust::device_ptr self_data(THCudaTensor_data(self)); + + float result = thrust::reduce(self_data, self_data+THCudaTensor_nElement(self), (float)(1), thrust::multiplies()); + + THCudaTensor_free(self); + return result; +} + struct dim4 { @@ -511,6 +546,11 @@ void THCudaTensor_sum(THCudaTensor *self, THCudaTensor *src, long dimension) return THCudaTensor_reduceDim(self, src, dimension, 0.0f, thrust::plus()); } +void THCudaTensor_prod(THCudaTensor *self, THCudaTensor *src, long dimension) +{ + return THCudaTensor_reduceDim(self, src, dimension, 1.0f, thrust::multiplies()); +} + /* a set of reduction kernels that take in Binary ops on thrust pairs (of value, index) These are useful when you not only have to do a reduction, but you might have to preserve the location of contention (for example min/max operations) @@ -563,7 +603,7 @@ __host__ void THCudaTensor_transformReduceOuterDimIndex(THCudaTensor *tgt1, THCu } const unsigned nThreadPerBlock = 256; - unsigned nBlockPerColumn = DIVUP(size[0], nThreadPerBlock); + unsigned nBlockPerColumn = DIVUP(size[0], nThreadPerBlock); dim3 threads(nThreadPerBlock); unsigned maxGridDim = 1024; // anything < 64k is fine. The choice has no impact on performance. dim3 grid(min(maxGridDim, nBlockPerColumn), min(maxGridDim, size[1]), min(maxGridDim, size[2])); @@ -668,7 +708,7 @@ __host__ void THCudaTensor_transformReduceInnermostDimIndex( } dim3 threads(32, 16); - unsigned nBlockPerRow = DIVUP(size[1], threads.y); + unsigned nBlockPerRow = DIVUP(size[1], threads.y); unsigned maxGridDim = 1024; // anything < 64k is fine. The choice has no impact on performance. dim3 grid(min(maxGridDim, size[2]), min(maxGridDim, nBlockPerRow), min(maxGridDim, size[3])); diff --git a/lib/THC/THCTensorMath.h b/lib/THC/THCTensorMath.h index 8e446cf1..4637dcef 100644 --- a/lib/THC/THCTensorMath.h +++ b/lib/THC/THCTensorMath.h @@ -6,6 +6,11 @@ THC_API void THCudaTensor_fill(THCudaTensor *self, float value); THC_API void THCudaTensor_zero(THCudaTensor *self); +THC_API void THCudaTensor_zeros(THCudaTensor *r_, THLongStorage *size); +THC_API void THCudaTensor_ones(THCudaTensor *r_, THLongStorage *size); +THC_API void THCudaTensor_reshape(THCudaTensor *r_, THCudaTensor *t, THLongStorage *size); +THC_API long THCudaTensor_numel(THCudaTensor *t); + THC_API void THCudaTensor_add(THCudaTensor *self, THCudaTensor *src, float value); THC_API void THCudaTensor_mul(THCudaTensor *self, THCudaTensor *src, float value); THC_API void THCudaTensor_div(THCudaTensor *self, THCudaTensor *src, float value); @@ -23,9 +28,11 @@ THC_API float THCudaTensor_dot(THCudaTensor *self, THCudaTensor *src); THC_API float THCudaTensor_minall(THCudaTensor *self); THC_API float THCudaTensor_maxall(THCudaTensor *self); THC_API float THCudaTensor_sumall(THCudaTensor *self); +THC_API float THCudaTensor_prodall(THCudaTensor *self); THC_API void THCudaTensor_min(THCudaTensor *values, THCudaTensor *indices, THCudaTensor *src, long dim); THC_API void THCudaTensor_max(THCudaTensor *values, THCudaTensor *indices, THCudaTensor *src, long dim); THC_API void THCudaTensor_sum(THCudaTensor *self, THCudaTensor *src, long dim); +THC_API void THCudaTensor_prod(THCudaTensor *self, THCudaTensor *src, long dim); THC_API void THCudaTensor_addmv(THCudaTensor *self, float beta, THCudaTensor *t, float alpha, THCudaTensor *mat, THCudaTensor *vec); THC_API void THCudaTensor_addmm(THCudaTensor *self, float beta, THCudaTensor *t, float alpha, THCudaTensor *mat1, THCudaTensor *mat2); diff --git a/test/test.lua b/test/test.lua index 34e0f0d6..37b0bdb8 100644 --- a/test/test.lua +++ b/test/test.lua @@ -191,6 +191,49 @@ function test.largeNoncontiguous() compareFloatAndCuda(x, f) end +function test.zero() + local sz1 = math.floor(torch.uniform(minsize,maxsize)) + local sz2 = math.floor(torch.uniform(minsize,maxsize)) + local x = torch.FloatTensor():rand(sz1, sz2) + compareFloatAndCudaTensorArgs(x, 'zero') +end + +function test.fill() + local sz1 = math.floor(torch.uniform(minsize,maxsize)) + local sz2 = math.floor(torch.uniform(minsize,maxsize)) + local x = torch.FloatTensor():rand(sz1, sz2) + local v = torch.uniform() + compareFloatAndCudaTensorArgs(x, 'fill', v) +end + +function test.reshape() + local sz1 = math.floor(torch.uniform(minsize,maxsize))*2 + local sz2 = math.floor(torch.uniform(minsize,maxsize)) + local x = torch.FloatTensor():rand(sz1, sz2) + compareFloatAndCudaTensorArgs(x, 'reshape', sz1/2, sz2*2) +end + +function test.zeros() + local sz1 = math.floor(torch.uniform(minsize,maxsize)) + local sz2 = math.floor(torch.uniform(minsize,maxsize)) + local t = torch.getdefaulttensortype() + torch.setdefaulttensortype('torch.CudaTensor') + local x = torch.zeros(sz1, sz2) + assert(x:sum() == 0) + torch.setdefaulttensortype(t) +end + +function test.ones() + local sz1 = math.floor(torch.uniform(minsize,maxsize)) + local sz2 = math.floor(torch.uniform(minsize,maxsize)) + local t = torch.getdefaulttensortype() + torch.setdefaulttensortype('torch.CudaTensor') + local x = torch.ones(sz1, sz2) + assert(x:sum() == x:nElement()) + torch.setdefaulttensortype(t) +end + + function test.add() local sz1 = math.floor(torch.uniform(minsize,maxsize)) local sz2 = math.floor(torch.uniform(minsize,maxsize)) @@ -303,6 +346,8 @@ function test.min() end function test.sum() + local minsize = 10 + local maxsize = 20 local sz1 = math.floor(torch.uniform(minsize,maxsize)) local sz2 = math.floor(torch.uniform(minsize,maxsize)) local x = torch.FloatTensor():rand(sz1, sz2) @@ -321,6 +366,8 @@ function test.cumsum() end function test.prod() + local minsize = 10 + local maxsize = 20 local sz1 = math.floor(torch.uniform(minsize,maxsize)) local sz2 = math.floor(torch.uniform(minsize,maxsize)) local x = torch.FloatTensor():rand(sz1, sz2)