Understanding vectorisation and parallelisation of tensors

I've made this code which carries onward from the tensors tutorial on youtube, using code from the matrix multiplicaiton tutorial:
from random import rand
from math import trunc, rsqrt
from sys.info import simdwidthof
from algorithm import vectorize, parallelize

alias type = DType.float32
alias simd_width: Int = simdwidthof[type]()


fn tensor_math(t: Tensor[type]) -> Tensor[type]:
var t_new = Tensor[type](t.shape())
for i in range(t_new.num_elements()):
t_new[i] = rsqrt(t[i])

return t_new


fn vectorized_tensor_math(t: Tensor[type]) -> Tensor[type]:
var t_new = Tensor[type](t.shape())
let rows = t_new.shape()[0]
let columns = t_new.shape()[1]

@parameter
fn calc_row(row: Int):
# print("Accessing row: ")
# print(row)

@parameter
fn vecmath[simd_width: Int](col: Int) -> None:
# print("Accessing column: ")
# print(col)
t_new.simd_store[simd_width](
row * columns + col,
rsqrt[type, simd_width](
t.simd_load[simd_width](
row * columns + col,
)
),
)

vectorize[simd_width, vecmath](columns)

parallelize[calc_row](rows)
return t_new


fn main() raises:
let t = rand[type](
3,
30,
)
let t_new = vectorized_tensor_math(t)
print(t_new)
from random import rand
from math import trunc, rsqrt
from sys.info import simdwidthof
from algorithm import vectorize, parallelize

alias type = DType.float32
alias simd_width: Int = simdwidthof[type]()


fn tensor_math(t: Tensor[type]) -> Tensor[type]:
var t_new = Tensor[type](t.shape())
for i in range(t_new.num_elements()):
t_new[i] = rsqrt(t[i])

return t_new


fn vectorized_tensor_math(t: Tensor[type]) -> Tensor[type]:
var t_new = Tensor[type](t.shape())
let rows = t_new.shape()[0]
let columns = t_new.shape()[1]

@parameter
fn calc_row(row: Int):
# print("Accessing row: ")
# print(row)

@parameter
fn vecmath[simd_width: Int](col: Int) -> None:
# print("Accessing column: ")
# print(col)
t_new.simd_store[simd_width](
row * columns + col,
rsqrt[type, simd_width](
t.simd_load[simd_width](
row * columns + col,
)
),
)

vectorize[simd_width, vecmath](columns)

parallelize[calc_row](rows)
return t_new


fn main() raises:
let t = rand[type](
3,
30,
)
let t_new = vectorized_tensor_math(t)
print(t_new)
Have I understood their use properly when using them with Tensors?
0 Replies
No replies yetBe the first to reply to this messageJoin
Want results from more Discord servers?
Add your server