Line | Branch | Exec | Source |
---|---|---|---|
1 | #include <cstdlib> | ||
2 | #include <iostream> | ||
3 | |||
4 | #include "dtypes.h" | ||
5 | |||
6 | #include "utilities/allocator.h" | ||
7 | #include "utilities/metadata.h" | ||
8 | |||
9 | #define DISABLE_ALIGNED_ALLOC 0 | ||
10 | #define ALIGNMENT 32 | ||
11 | |||
12 | #if !((defined WITH_VALGRIND) && WITH_VALGRIND) | ||
13 | # define USE_MIMALLOC 1 | ||
14 | #else | ||
15 | # define USE_MIMALLOC 0 | ||
16 | #endif | ||
17 | |||
18 | #if USE_MIMALLOC | ||
19 | # include <mimalloc.h> | ||
20 | #endif | ||
21 | |||
22 | namespace lython { | ||
23 | namespace meta { | ||
24 | bool& is_type_registry_available() { | ||
25 | static bool avail = false; | ||
26 | 758 | return avail; | |
27 | } | ||
28 | |||
29 | } // namespace meta | ||
30 | namespace device { | ||
31 | |||
32 | void* CPU::malloc(std::size_t n) { | ||
33 | #if USE_MIMALLOC | ||
34 | 114852 | return std::malloc(n); | |
35 | |||
36 | // return mi_malloc(n); | ||
37 | // return mi_malloc_aligned(n, ALIGNMENT); | ||
38 | #elif DISABLE_ALIGNED_ALLOC | ||
39 | return std::malloc(n); | ||
40 | #else | ||
41 | // TODO: seems 64bit alignment might be better (this is what tensorflow is using) | ||
42 | // but I have not found an official document stating so | ||
43 | static std::size_t alignment = ALIGNMENT; | ||
44 | |||
45 | // 16-byte aligned. | ||
46 | void* original = std::malloc(n + alignment); | ||
47 | |||
48 | if (original == nullptr) | ||
49 | return nullptr; | ||
50 | |||
51 | // alignment is a power of 2 (16, 32, 64) | ||
52 | // a = 0001 0000 = 16 | ||
53 | // a - 1 = 0000 1111 | ||
54 | // ~ (a - 1) = 1111 0000 | ||
55 | // b & ~ (a - 1) = Keep the top most ones 0 out the rest (i.e) get the closest power of two | ||
56 | std::size_t cp2 = reinterpret_cast<std::size_t>(original) & ~(std::size_t(alignment - 1)); | ||
57 | |||
58 | // add alignment to it to get a memory address that is inside our allocation & aligned | ||
59 | void* aligned = reinterpret_cast<void*>(cp2 + alignment); | ||
60 | |||
61 | // store original pointer before the aligned address for deletion | ||
62 | *(reinterpret_cast<void**>(aligned) - 1) = original; | ||
63 | |||
64 | return aligned; | ||
65 | #endif | ||
66 | } | ||
67 | |||
68 | bool CPU::free(void* ptr, std::size_t) { | ||
69 | #if USE_MIMALLOC | ||
70 | 116169 | std::free(ptr); | |
71 | |||
72 | // mi_free(ptr); | ||
73 | // mi_free_aligned(ptr, ALIGNMENT); | ||
74 | 116169 | return true; | |
75 | #elif DISABLE_ALIGNED_ALLOC | ||
76 | std::free(ptr); | ||
77 | return true; | ||
78 | #else | ||
79 | if (ptr) { | ||
80 | std::free(*(reinterpret_cast<void**>(ptr) - 1)); | ||
81 | } | ||
82 | return true; | ||
83 | #endif | ||
84 | } | ||
85 | |||
86 | } // namespace device | ||
87 | |||
88 | void show_alloc_stats() { | ||
89 |
1/1✓ Branch 1 taken 14 times.
|
14 | metadata_init_names(); |
90 | |||
91 |
1/1✓ Branch 1 taken 14 times.
|
14 | auto const& stat = meta::stats(); |
92 |
1/1✓ Branch 1 taken 14 times.
|
14 | std::unordered_map<int, std::string> const& names = meta::typenames(); |
93 | |||
94 |
1/1✓ Branch 2 taken 14 times.
|
14 | auto line = String(4 + 40 + 10 + 10 + 10 + 10 + 10 + 10 + 7 + 1, '-'); |
95 | |||
96 |
2/2✓ Branch 1 taken 14 times.
✓ Branch 4 taken 14 times.
|
14 | std::cout << line << '\n'; |
97 |
1/1✓ Branch 1 taken 14 times.
|
28 | std::cout << fmt::format("{:>4} {:>41} {:>10} {:>10} {:>10} {:>10} {:>10} {:>10}\n", |
98 | "id", | ||
99 | "name", | ||
100 | "alloc", | ||
101 | "dealloc", | ||
102 | "remain", | ||
103 | "size", | ||
104 | "size_free", | ||
105 |
1/1✓ Branch 1 taken 14 times.
|
14 | "bytes"); |
106 | |||
107 |
2/2✓ Branch 1 taken 14 times.
✓ Branch 4 taken 14 times.
|
14 | std::cout << line << '\n'; |
108 | 14 | int total = 0; | |
109 | |||
110 |
2/2✓ Branch 1 taken 2887 times.
✓ Branch 2 taken 14 times.
|
2901 | for (size_t i = 0; i < stat.size(); ++i) { |
111 |
1/1✓ Branch 2 taken 2887 times.
|
2887 | std::string name = ""; |
112 | try { | ||
113 |
2/2✓ Branch 1 taken 2886 times.
✓ Branch 4 taken 2886 times.
|
2887 | name = names.at(int(i)); |
114 |
2/3✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
✓ Branch 4 taken 1 times.
|
1 | } catch (std::out_of_range&) { name = ""; } |
115 | |||
116 | 2887 | auto init = stat[i].startup_count; | |
117 | 2887 | auto alloc = stat[i].allocated - init; | |
118 | 2887 | auto dealloc = stat[i].deallocated; | |
119 | 2887 | auto size = stat[i].size_alloc; | |
120 | 2887 | auto size_free = stat[i].size_free; | |
121 | 2887 | auto bytes = stat[i].bytes; | |
122 | |||
123 | 2887 | total += size * bytes; | |
124 | |||
125 |
2/2✓ Branch 0 taken 588 times.
✓ Branch 1 taken 2299 times.
|
2887 | if (alloc != 0) { |
126 |
1/1✓ Branch 1 taken 588 times.
|
1176 | std::cout << fmt::format("{:>4} {:>41} {:>10} {:>10} {:>10} {:>10} {:>10} {:>10}\n", |
127 | i, | ||
128 |
1/1✓ Branch 2 taken 588 times.
|
1176 | String(name.c_str()), |
129 | alloc, | ||
130 | dealloc, | ||
131 | 1176 | alloc - dealloc, | |
132 | size, | ||
133 | size_free, | ||
134 |
1/1✓ Branch 1 taken 588 times.
|
588 | bytes); |
135 | } | ||
136 | 2887 | } | |
137 |
3/3✓ Branch 1 taken 14 times.
✓ Branch 4 taken 14 times.
✓ Branch 7 taken 14 times.
|
14 | std::cout << "Total: " << total << std::endl; |
138 |
2/2✓ Branch 1 taken 14 times.
✓ Branch 4 taken 14 times.
|
14 | std::cout << line << '\n'; |
139 | |||
140 | std::cout | ||
141 | << "NB: Notice that not everything was `freed`, this is because the accounting happens " | ||
142 | "before the static variables gets released.\n" | ||
143 | "which means it does not necessarily means there is a memory leak.\n" | ||
144 | "use valgrind to make sure everything is released properly.\n" | ||
145 | "\n" | ||
146 | "* Pair[String, NativeBinaryOp]: Native operator, allocated once using static\n" | ||
147 | "* Pair[StringView, size_t]: From the string database, allocated once using static\n" | ||
148 | "* Constant: builtin constant created once using static\n" | ||
149 |
1/1✓ Branch 1 taken 14 times.
|
14 | "\n----\n"; |
150 | 14 | } | |
151 | |||
152 | } // namespace lython | ||
153 |