U, sigma , V = ti.svd(A, ti.f32) # 矩阵的奇异值分解 # sigma 是对角矩阵
ti.sin(A)/cos(A)... (element -wise) # 将矩阵中的每个元素进行 sin 计算 # 所有的标量运算符均可以在矩阵上运算
8. 并行 for 循环
因为 Taichi 中的循环有两种形式
Range-for loops :
类似于 python 中的 for 循环;
只是在最外层的范围内使用它时会被并行化;
循环内并行执行的代码块顺序是不确定的;
循环的范围可以嵌套。
Struct-for loops :迭代(稀疏)张量元素。
8.1 Range-for loops
在 Taichi 的 kernel 中最外层作用域的 For 循环被自动并行化。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
@ti.kernel deffill(): for i inrange(10): # 并行执行,0 <= i < 4 x[i] += i s = 0 for j inrange(5): # 在最外层for循环的每个线程中串行执行 s += j
y[i] = s
@ti.kernel deffill_3d(): # Parallelized for all 3 <= i < 8, 1 <= j < 6, 0 <= k < 9 # ti.ndrange 可以定义上下限 for i, j, k in ti.ndrange((3, 8), (1, 6), 9): x[i, j, k] = i + j + k
NOTE :只有最外层范围的循环可以并行化;但循环若不是在最外层(例如最外层还有 if 语句),则无法自动并行。
1 2 3 4 5 6 7 8 9 10
@ti.kernel deffoo(): for i inrange(10): # Parallelized :-) ...
@ti.kernel defbar(k: ti.i32): if k > 42: for i inrange(10): # Serial :-( ...
8.2 Struct-for loops
1 2 3 4 5 6 7 8 9 10 11 12 13
import taichi as ti
ti.init(arch=ti.gpu)
n = 320 pixels = ti.var(dt=ti.f32, shape=(n * 2, n))
@ti.kernel defpaint(t: ti.f32): for i, j in pixels: # Parallized over all pixels pixels[i, j] = i * 0.001 + j * 0.002 + t
a = ti . var (dt=ti. f32, shape =(42, 63)) # A tensor of 42x63 scalars b = ti . Vector (3 , dt=ti . f32 , shape=4) # A tensor of 4x 3D vectors C = ti . Matrix (2 , 2 , dt=ti.f32 , shape =(3, 5) ) # A tensor of 3x5 2x2 matrices
@ti.data_oriented classSolarSystem: def__init__(self, n, dt):# Initializer of the solar system simulator self.n = n self.dt = dt self.x = ti.Vector.field(2, dtype=ti.f32, shape=n) self.v = ti.Vector.field(2, dtype=ti.f32, shape=n) self.center = ti.Vector.field(2, dtype=ti.f32, shape=())
@staticmethod @ti.func defrandom_vector(radius):# Create a random vector in circle theta = ti.random() * 2 * math.pi r = ti.random() * radius return r * ti.Vector([ti.cos(theta), ti.sin(theta)])
@ti.kernel definitialize_particles(self): # (Re)initialize particle position/velocities for i inrange(self.n): offset = self.random_vector(0.5) self.x[i] = self.center[None] + offset # Offset from center self.v[i] = [-offset.y, offset.x] # Perpendicular to offset self.v[i] += self.random_vector(0.02) # Random velocity noise self.v[i] *= 1 / offset.norm()**1.5# Kepler's third law
@ti.kernel defintegrate(self):# Semi-implicit Euler time integration for i inrange(self.n): self.v[i] += self.dt * self.gravity(self.x[i]) self.x[i] += self.dt * self.v[i]
defrender(self, gui):# Render the scene on GUI gui.circle([0.5, 0.5], radius=10, color=0xffaa88) gui.circles(solar.x.to_numpy(), radius=3, color=0xffffff)
solar = SolarSystem(8, 0.0001) solar.center[None] = [0.5, 0.5] solar.initialize_particles()
gui = ti.GUI("Solar System", background_color=0x0071a) while gui.running: if gui.get_event() and gui.is_pressed(gui.SPACE): solar.initialize_particles() # reinitialize when space bar pressed.
for i inrange(10): # Time integration solar.integrate()
solar.render(gui) gui.show()
14. Templates
14.1 template 元编程
1 2 3 4
@ti.kernel defoffset(x: ti.template(), y: ti.template(), c: ti.f32): for i in x: y[i] = x[i] + c
for i inrange(100): hello(i) # 100 different kernels will be created
@ti.kernel defworld(i: ti.i32): print(i)
for i inrange(100): world(i) # The only instance will be reused现
复制不同阶的 Tensor :
1 2 3 4
@ti.kernel defcopy(x: ti.template(), y: ti.template()): for I in ti.grouped(y): x[I] = y[I]n
NOTE :
其中使用 ti.grouped(y) 对张量 y 中的所有元素进行了一个打包操作
1 2 3 4 5
@ti.kernel defarray_op(x: ti.template(), y: ti.template()): for I in ti.grouped(x): # I is a vector of size x.dim() and data type i32 y[I] = I[0] + I[1]
如果 x 是二维张量,代码为:
1 2 3 4
@ti.kernel defarray_op(x: ti.template(), y: ti.template()): for i, j in x: y[i, j] = i + j
14.2 template 的反射
可在编译器获得 Tensor 大小的反射:
编译时可以获得 template 的大小
1 2 3 4 5 6 7 8 9 10 11
import taichi as ti
tensor = ti.var(ti.f32, shape=(4, 8, 16, 32, 64))
@ti.kernel defprint_tensor_size(x: ti.template()): print(x.dim()) for i in ti.static(range(x.dim())): print(x.shape()[i])
print_tensor_size(tensor)
14.3 编译期的分支语句
使用 ti.static(value) 让分支语句在编译期执行:
1 2 3 4 5 6
enable_projection = True
@ti.kernel defstatic(): if ti.static(enable_projection): # No runtime overhead x[0] = 1
14.4 编译期的循环展开
使用 ti.static(range(...)) 进行循环展开:
1 2 3 4 5 6 7 8 9 10 11 12 13
import taichi as ti
ti.init() x = ti.Vector(3, dt=ti.i32, shape=16)
@ti.kernel deffill(): for i in x: for j in ti.static(range(3)): x[i][j] = j print(x[i])