OOD 实现

实现

首先给出这次的需求:创建两块区域,分别为生成 cube 和销毁 cube 的区域。每个 TickTime 都生成一定量的 cube,并向着销毁区域前进并选择。首先直接来看一看 OOD 版本的实现:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
// ReturnToPool.cs

using UnityEngine;
using UnityEngine.Pool;

namespace JobsTutorials.Lesson1.Scripts.Common
{
public class ReturnToPool : MonoBehaviour
{
public ObjectPool<GameObject> pool = null;

public void OnDisappear()
{
if (pool != null)
{
pool.Release(gameObject);
}
}
}
}

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
// AutoRotateAndMove.cs

using JobsTutorials.Lesson1.Scripts.Common;
using Unity.Profiling;
using UnityEngine;

namespace JobsTutorials.Lesson1.Scripts.OOD
{
[RequireComponent(typeof(ReturnToPool))]
public class AutoRotateAndMove : MonoBehaviour
{
private const float Epsilon = 0.05f;
public float rotateSpeed = 180.0f;
public float moveSpeed = 5.0f;
public Vector3 targetPos;

private static readonly ProfilerMarker profilerMarker = new ProfilerMarker("CubeMarch");

private void Update()
{
using (profilerMarker.Auto())
{
transform.Rotate(Vector3.up, rotateSpeed * Time.deltaTime);
var dist = targetPos - transform.position;
if (dist.magnitude >= Epsilon)
{
var moveDir = dist.normalized;
transform.position += moveDir * (moveSpeed * Time.deltaTime);
}
else
{
var component = GetComponent<ReturnToPool>();
if (component)
{
component.OnDisappear();
}
}
}
}
}
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
// CubeGenerator.cs

using JobsTutorials.Lesson1.Scripts.Common;
using UnityEngine;
using UnityEngine.Pool;
using Random = UnityEngine.Random;

namespace JobsTutorials.Lesson1.Scripts.OOD
{
[RequireComponent(typeof(BoxCollider))]
public class CubeGenerator : MonoBehaviour
{
public GameObject cubeArchetype = null;
public GameObject targetArea = null;
[Range(1, 10000)] public int generationTotalNum = 2000;
[Range(1, 60)] public int generationNumPerTickTime = 10;
[Range(0.1f, 1.0f)] public float tickTime = 0.2f;
[HideInInspector] public Vector3 generatorAreaSize;
[HideInInspector] public Vector3 targetAreaSize;

// 开启 collectionChecks 后,在外部销毁对象池内部物体将抛出异常
public bool collectionChecks = true;
private ObjectPool<GameObject> _pool = null;
private float _timer = 0.0f;

private void Start()
{
_pool = new ObjectPool<GameObject>(CreatePooledItem,
OnTakeFromPool,
OnReturnedToPool,
OnDestroyPoolObject,
collectionChecks,
10,
generationTotalNum);

generatorAreaSize = GetComponent<BoxCollider>().size;
if (targetArea != null)
{
targetAreaSize = targetArea.GetComponent<BoxCollider>().size;
}
}

private void Update()
{
if (_timer >= tickTime)
{
GenerateCubes();
_timer = 0.0f;
}

_timer += Time.deltaTime;
}

private void OnDestroy()
{
_pool.Dispose();
}

private void GenerateCubes()
{
if (cubeArchetype == null || _pool == null)
{
return;
}

for (int i = 0; i < generationNumPerTickTime; i++)
{
if (_pool.CountAll < generationTotalNum)
{
var cube = _pool.Get();
if (cube)
{
var component = cube.GetComponent<ReturnToPool>();
component.pool = _pool;
cube.transform.position = GetRandomPos(transform.position, generatorAreaSize);
if (targetArea != null)
{
cube.GetComponent<AutoRotateAndMove>().targetPos =
GetRandomPos(targetArea.transform.position, targetAreaSize);
}
}
}
else
{
_timer = 0.0f;
return;
}
}
}

private Vector3 GetRandomPos(Vector3 originPos, Vector3 areaSize)
{
return originPos + new Vector3(Random.Range(-areaSize.x * 0.5f, areaSize.x * 0.5f),
0,
Random.Range(-areaSize.z * 0.5f, areaSize.z * 0.5f));
}

private GameObject CreatePooledItem()
{
return Instantiate(cubeArchetype, transform);
}

private void OnReturnedToPool(GameObject gameObj)
{
gameObj.SetActive(false);
}

private void OnTakeFromPool(GameObject gameObj)
{
gameObj.SetActive(true);
}

private void OnDestroyPoolObject(GameObject gameObj)
{
Destroy(gameObj);
}
}
}

此处将 cube 的运动都放在了 AutoRotateAndMove 当中,CubeGenerator 负责使用 unity 自带的对象池管理对象的分配,生成点和销毁点的随机生成。其实这一块逻辑也没啥好讲的,直接开始改造。

Profiler 性能分析

DOD 实现

要使用 job 改造我们的 OOD 代码,我们就先得思考 job 主要是负责的什么。在 lesson0 当中,job 负责了 cube 的运动,也就是 transform 的变换,此处我们也是得将 cube 的旋转和移动都搬入 job 当中。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
// AutoRotateAndMoveJob.cs

public struct AutoRotateAndMoveJob : IJobParallelForTransform
{
public float deltaTime;
public float rotateSpeed;
public float moveSpeed;
public NativeArray<Vector3> randomTargetPosArray;

public void Execute(int index, TransformAccess transform)
{
var moveDir = (randomTargetPosArray[index] - transform.position).normalized;
transform.position += moveDir * moveSpeed * deltaTime;
var localEulerAngles = transform.localRotation.eulerAngles;
localEulerAngles.y += rotateSpeed * deltaTime;
transform.localRotation = Quaternion.Euler(localEulerAngles);
}
}

此处暂时不用管我们为什么不用 [BurstCompile] 编译,后面会一步一步优化。因为一个 job 的运行,需要我们所有物体的 transform,因此我们需要在创建时就将其每个 cube 的目标地址传入,也就是 randomTargetPosArray。但是因为 TransformAccess 并没有提供任何绕任意轴旋转的方法,因此此处需要我们自己处理一下。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
// CubeGenerator.cs

using JobsTutorials.Lesson1.Scripts.Common;
using Unity.Collections;
using Unity.Profiling;
using UnityEngine;
using UnityEngine.Jobs;
using UnityEngine.Pool;
using Random = UnityEngine.Random;

namespace JobsTutorials.Lesson1.Scripts.DOD
{
[RequireComponent(typeof(BoxCollider))]
public class CubeGenerator : MonoBehaviour
{
public GameObject cubeArchetype = null;
public GameObject targetArea = null;
[Range(1, 10000)] public int generationTotalNum = 2000;
[Range(1, 60)] public int generationNumPerTickTime = 10;
[Range(0.1f, 1.0f)] public float tickTime = 0.2f;
[HideInInspector] public Vector3 generatorAreaSize;
[HideInInspector] public Vector3 targetAreaSize;
public float rotateSpeed = 180.0f;
public float moveSpeed = 5.0f;

// 开启 collectionChecks 后,在外部销毁对象池内部物体将抛出异常
public bool collectionChecks = true;
private ObjectPool<GameObject> _pool = null;
private float _timer = 0.0f;

private TransformAccessArray _transformAccessArray;
private NativeArray<Vector3> _randomTargetPosArray;
private Transform[] _transforms;

private static readonly ProfilerMarker profilerMarker = new ProfilerMarker("CubesMarchWithJob");

private void Start()
{
_pool = new ObjectPool<GameObject>(CreatePooledItem,
OnTakeFromPool,
OnReturnedToPool,
OnDestroyPoolObject,
collectionChecks,
10,
generationTotalNum);

generatorAreaSize = GetComponent<BoxCollider>().size;
targetAreaSize = targetArea.GetComponent<BoxCollider>().size;

// 先构建好所有的物体然后填充 Transform
_randomTargetPosArray = new NativeArray<Vector3>(generationTotalNum, Allocator.Persistent);
_transforms = new Transform[generationTotalNum];

for (int i = 0; i < generationTotalNum; i++)
{
var cube = _pool.Get();
var component = cube.AddComponent<AutoReturnToPool>();

component.pool = _pool;
var randomGenerationPos = GetRandomPos(transform.position, generatorAreaSize);
cube.transform.position = randomGenerationPos;
component.generationPos = randomGenerationPos;
_transforms[i] = cube.transform;

var randomTargetPos = GetRandomPos(targetArea.transform.position, targetAreaSize);
_randomTargetPosArray[i] = randomTargetPos;
component.targetPos = randomTargetPos;
}

_transformAccessArray = new TransformAccessArray(_transforms);
// 将创建的物体又存入对象池
for (int i = generationTotalNum - 1; i >= 0; i--)
{
_pool.Release(_transforms[i].gameObject);
}
}

private void Update()
{
using (profilerMarker.Auto())
{
var autoRotateAndMoveJob = new AutoRotateAndMoveJob
{
deltaTime = Time.deltaTime,
moveSpeed = moveSpeed,
rotateSpeed = rotateSpeed,
randomTargetPosArray = _randomTargetPosArray
};

var autoRotateAndMoveJobHandle = autoRotateAndMoveJob.Schedule(_transformAccessArray);
autoRotateAndMoveJobHandle.Complete();

if (_timer >= tickTime)
{
GenerateCubes();
_timer = 0.0f;
}

_timer += Time.deltaTime;
}
}

private void OnDestroy()
{
if (_transformAccessArray.isCreated)
{
_transformAccessArray.Dispose();
}

_randomTargetPosArray.Dispose();
_pool.Dispose();
}

private void GenerateCubes()
{
if (cubeArchetype == null || _pool == null)
{
return;
}

for (int i = 0; i < generationNumPerTickTime; i++)
{
// 此处生成其实是拿出来,因此使用 CountActive
if (_pool.CountActive < generationTotalNum)
{
_pool.Get();
}
else
{
_timer = 0.0f;
return;
}
}
}

private Vector3 GetRandomPos(Vector3 originPos, Vector3 areaSize)
{
return originPos + new Vector3(Random.Range(-areaSize.x * 0.5f, areaSize.x * 0.5f),
0,
Random.Range(-areaSize.z * 0.5f, areaSize.z * 0.5f));
}

private GameObject CreatePooledItem()
{
return Instantiate(cubeArchetype, transform);
}

private void OnReturnedToPool(GameObject gameObj)
{
gameObj.SetActive(false);
}

private void OnTakeFromPool(GameObject gameObj)
{
gameObj.SetActive(true);
}

private void OnDestroyPoolObject(GameObject gameObj)
{
Destroy(gameObj);
}
}
}

在上面 job 的时候就说过,我们创建 job 时需要有所有 cube 的 transform,这就成了局限性。导致我们在此处必须创建出:

  • 数量等同于 generationTotalNum 的 cube 对象
  • 为每一个对象都创建出其生成点和销毁点

在创建后又马上将其返回对象池,接着就像之前一样,每次从对象池中拿出 cube 来,因为此处我们对象池已经是生成满了,因此使用 CountActive 来模拟当前对象池中已生成物体数量。

Profiler 性能分析

此时我们可以发现——和传统 OOD 实现方式相比,改造成 job 之后变化貌似不是很大。此时我们就想起来前面的,[BurstCompile] 也不加,float3 也不用,因此接下来我们就根据不同的 job 优化方案看看各方案之间的性能对比。

优化方案

Optimize0

此版本只对 job 进行 Burst 编译

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
[BurstCompile]
public struct AutoRotateAndMoveJobOptimize0 : IJobParallelForTransform
{
public float deltaTime;
public float rotateSpeed;
public float moveSpeed;
public NativeArray<Vector3> randomTargetPosArray;

public void Execute(int index, TransformAccess transform)
{
var moveDir = (randomTargetPosArray[index] - transform.position).normalized;
transform.position += moveDir * moveSpeed * deltaTime;
var localEulerAngles = transform.localRotation.eulerAngles;
localEulerAngles.y += rotateSpeed * deltaTime;
transform.localRotation = Quaternion.Euler(localEulerAngles);
}
}

此处优化作用还是非常明显的:

  • BehaviourUpdate 从 2.38ms 优化到了 1.21ms
  • 生成 cube 的 Update 也从 1.64ms 优化到了 0.42ms

这个优化效果还是非常非常明显的。

Optimize1

此处我们将 NativeArray<Vector3> 的属性加上 [ReadOnly],来使多个工作线程对其访问更加高效

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
[BurstCompile]
public struct AutoRotateAndMoveJobOptimize1 : IJobParallelForTransform
{
public float deltaTime;
public float rotateSpeed;
public float moveSpeed;
[ReadOnly] public NativeArray<Vector3> randomTargetPosArray;

public void Execute(int index, TransformAccess transform)
{
var moveDir = (randomTargetPosArray[index] - transform.position).normalized;
transform.position += moveDir * moveSpeed * deltaTime;
var localEulerAngles = transform.localRotation.eulerAngles;
localEulerAngles.y += rotateSpeed * deltaTime;
transform.localRotation = Quaternion.Euler(localEulerAngles);
}
}

可以看到此时的性能提升也并不是很大,在很多帧的表现和 Optimize0 都是大同小异的。

Optimize2

最后一种优化,我们将 NativeArray<Vector3> 更换为 NativeArray<float3>

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
[BurstCompile]
public struct AutoRotateAndMoveJobOptimize2 : IJobParallelForTransform
{
public float deltaTime;
public float rotateSpeed;
public float moveSpeed;
[ReadOnly] public NativeArray<float3> randomTargetPosArray;

public void Execute(int index, TransformAccess transform)
{
var moveDir = math.normalize(randomTargetPosArray[index] - (float3)transform.position);
var deltaDistance = moveDir * moveSpeed * deltaTime;
transform.position += new Vector3(deltaDistance.x, deltaDistance.y, deltaDistance.z);
var localEulerAngles = transform.localRotation.eulerAngles;
localEulerAngles.y += rotateSpeed * deltaTime;
transform.localRotation = Quaternion.Euler(localEulerAngles);
}
}

这个数据其实我有点意外,因为带来的提升并没有想象中的大,其看起来只是比 Optimize1 更稳定,一直稳定在图里面这个数据,不像 Optimize1 经常会跳到一个可能比 Optimize0 还稍大的数据。

由此得出,目前对性能影响最大的还是是否经过 Burst 编译。毕竟在排队调度中已经讨论过,合理的调度对系统的速度影响还是非常大的。