blob: d8de98f62917b865912aa3d3ace840e881e1270e [file] [log] [blame]
Douglas Andersona3c495a2020-04-27 15:57:35 -07001#!/usr/bin/env python3
Douglas Anderson518003b2017-12-21 10:12:40 -08002
3# Copyright 2017 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Mad Memory Muncher - Dynamically Organizing Non-Uniprocess Tester.
8
9This program attempts to exercise low memory situations by munching memory
10in a coordinated way across several processes.
11
12Specifically, there is a single controlling process that keeps communication
13channels open to subprocesses so that all processes can start and stop various
14parts of the test at the same time. This tester also has various clever ways
15to access memory.
16
17The main modes are: munch (allocate memory), taste (re-read already
18allocated memory), and chew (modify already allocated memory). Whenever
19possible we try to put some sane values into memory so that any memory
20compression will behave in a real-world-like way.
21
22At the moment this program always makes sure that all of the child
23sub-processes are set to have an OOM score of 1000 (easy to kill them) and
24the parent has a default OOM score (unkillable on Chrome OS). At various
25checkpoints in the test the parent looks for dead children and stops the test.
26
27NOTES:
28- The way this program works is subject to chagne depending on the needs
29 of people stressing memory. Don't rely on command line arguments staying
30 consistent. If we need a consistent test, we could fork this or add a
31 consistent subcommand.
32- You should probably have KASAN and slub_debug turned off when running this.
33 If you have those on then you're not doing a real test of the memory system
34 and shouldn't be surprised that it can't keep up.
35
36Examples:
37 1. Launch one process per CPU and aim for 500 MB swap left. Re-access
38 memory for 70 seconds and then access/modify memory for 90 seconds:
39
40 mmm_donut --free_swap=500 --taste=70 --chew=90
41 2. Like #1 but use 200 processes. Note that since by default each
42 process will access 1MB at a time we'll probably really end up stopping
43 at closer to 300 MB free swap or less (the muncher stops telling
44 sub-processes to allocate when free swap is 500 MB, but then any
45 outstanding allocatoins will finish.
46
47 mmm_donut -n200 --free_swap=500 --taste=70 --chew=90
48 3. Like #1 but have children allocate 20MB chunks. This will act to
49 more quickly allocate memory but will also over-allocate a bit more.
50 On a 6-CPU system you might overallocate by 120MB.
51
52 mmm_donut --free_swap=500 --munch_mbs=20 --taste=70 --chew=90
53"""
54
Douglas Anderson518003b2017-12-21 10:12:40 -080055import argparse
56import ctypes
57import multiprocessing
58import numpy
59import os
Douglas Andersona3c495a2020-04-27 15:57:35 -070060import queue
Douglas Anderson518003b2017-12-21 10:12:40 -080061import subprocess
62import sys
63import time
64
65libc = ctypes.CDLL('libc.so.6')
66libc.free.argtypes = [ctypes.c_void_p]
67libc.free.restype = None
68libc.valloc.argtypes = [ctypes.c_size_t]
69libc.valloc.restype = ctypes.c_void_p
70
71# By default, we'll fill memory with data based on the contents of this
72# file. Ideally it should be a big file and fairly representative of
73# what we expect memory to contain.
74_DEFAULT_FILE_TO_MAP = '/opt/google/chrome/chrome'
75
76_KB = 1024
77_MB = _KB * _KB
78
79# For the purpose of this program, a 'word' is 32-bits.
Douglas Andersona3c495a2020-04-27 15:57:35 -070080_WORDS_PER_MB = _MB // 4
Douglas Anderson518003b2017-12-21 10:12:40 -080081
82_PAGESIZE = os.sysconf('SC_PAGESIZE')
83
84
85class _MemoryMuncher(object):
86 """A class for eating memory.
87
88 This class has functions in it for efficiently munching up memory.
89 Specifically, it has a few things it can do:
90
91 munch: This will allocate more memory and fill it with data copied from
92 a prototype datasource. It will attempt to make this data 'unique'
93 by adding a value to each word based on the current PID. Allocating
94 is done 1 MB at a time and done with valloc() so we get page-sized
95 allocations. Copying / making unique is done with numpy to get
96 reasonably efficiency.
97 taste: This will re-read memory (1MB at a time) that's already been munched,
98 which ought to cause it to get paged in. We read 1 word from each page.
99 Again we use numpy which ought to make it somewhat efficient.
100 chew: This will attempt to read-modify-write memory (1MB at a time) that's
101 already been munched. This ought to have no huge performance difference
102 than taste.
103 spit: This will release memory allocated by munch.
104
105 Attributes:
106 num_mbs_allocated: Number of MB that are currently allocated.
107 num_mbs_munched: Number of MB that have been munched in total. Note
108 that if you munch something and then spit it out it still counts in
109 this number, so munch(30); spit(10); munch(20) => 50.
110 num_mbs_tasted: Number of MB that have been tasted in total.
111 num_mbs_chewed: Number of MB that have been chewed in total.
112 """
113
114 def __init__(self, proto_data=None):
115 """Create a MemoryMuncher object.
116
117 Args:
118 proto_data: A numpy.memmap array, or None for the default. We'll
119 use this as prototype data to copy to our allocated pages.
120 """
121 if not proto_data:
122 proto_data = numpy.memmap(_DEFAULT_FILE_TO_MAP,
123 dtype='uint32', mode='r')
124 self._proto_data = proto_data
Douglas Andersona3c495a2020-04-27 15:57:35 -0700125 self._num_proto_mbs = len(self._proto_data) // _WORDS_PER_MB
Douglas Anderson518003b2017-12-21 10:12:40 -0800126 self._at_proto_mb = 0
127
128 # Every time we munch through a chunk we'll add this to each integer to
129 # make the chunk look unique, then increment it.
130 self._unique = os.getpid() << 16
131
132 self._mbs = []
133 self._last_accessed_mb = -1
134
135 self.num_mbs_munched = 0
136 self.num_mbs_tasted = 0
137 self.num_mbs_chewed = 0
138
139 @property
140 def num_mbs_allocated(self):
141 return len(self._mbs)
142
143 def _alloc_array(self, n, element_type=ctypes.c_uint8):
144 """Allocate a numpy array using libc.valloc (page aligned allocation).
145
146 Args:
147 n: Number of elements in the array
148 element_type: The type of the element (a ctypes type)
149 """
150 ptr = libc.valloc(n * ctypes.sizeof(element_type))
151 ptr = ctypes.cast(ptr, ctypes.POINTER(element_type))
152
153 return numpy.ctypeslib.as_array(ptr, shape=(n,))
154
155 def _free_array(self, arr):
156 """Free a numpy array allocated with _alloc_array
157
158 Args:
159 arr: The return value from _alloc_array.
160 """
161 ptr = ctypes.cast(arr, ctypes.c_void_p)
162 libc.free(ptr)
163
164 def munch(self, mbs_to_munch, quick_alloc=False):
165 """Allocate the given number of mbs, filling the memory with data.
166
167 Args:
168 mbs_to_munch: The number of MBs to allocate.
169 quick_alloc: If true, we'll try to allocate quicker by not using
170 the proto data; we'll just put a unique value in the first
171 word of the page.
172 """
Douglas Andersona3c495a2020-04-27 15:57:35 -0700173 for _ in range(mbs_to_munch):
Douglas Anderson518003b2017-12-21 10:12:40 -0800174 # Allocate some memory using libc; give back a numpy object
175 mb = self._alloc_array(_WORDS_PER_MB, ctypes.c_uint32)
176
177 # Copy data from our proto data making it unique by adding a
178 # unique integer to each word.
179 mb[0] = self._unique
180
181 if quick_alloc:
182 # Don't even bother to zero memory, but put at least something
183 # unique per page
184 mb.reshape((_PAGESIZE, -1)).T[0] = self._unique
185 else:
186 # Copy from the next spot in the prototype
187 # As we copy, add the unique data based on our PID.
188 mb[:] = (self._proto_data[self._at_proto_mb *
189 _WORDS_PER_MB:
190 (self._at_proto_mb + 1) *
191 _WORDS_PER_MB] + self._unique)
192
193 # Update so we're ready for the next time
194 self._at_proto_mb += 1
195 self._at_proto_mb %= self._num_proto_mbs
196 self._unique += 1
197
198 self._mbs.append(mb)
199 self.num_mbs_munched += 1
200
201 def spit(self, mbs_to_spit):
202 """Spit (free) out the oldest munched memory.
203
204 Args:
205 mbs_to_spit: Number of MBs to spit.
206 """
Douglas Andersona3c495a2020-04-27 15:57:35 -0700207 for _ in range(mbs_to_spit):
Douglas Anderson518003b2017-12-21 10:12:40 -0800208 if not self._mbs:
209 raise RuntimeError('No more memory to spit out')
210 self._free_array(self._mbs.pop(0))
211
212 def taste(self, mbs_to_taste):
213 """Access memory that we've chewed through, reading 1 word per page.
214
215 Args:
216 mbs_to_taste: Number of MBs that we'd like to try to access
217 """
218 if not self._mbs:
219 raise RuntimeError('No memory')
220
221 mb_num = self._last_accessed_mb
Douglas Andersona3c495a2020-04-27 15:57:35 -0700222 for mb_num in range(mb_num + 1, mb_num + 1 + mbs_to_taste):
Douglas Anderson518003b2017-12-21 10:12:40 -0800223 mb_num %= len(self._mbs)
224 mb = self._mbs[mb_num]
225 self.num_mbs_tasted += 1
226 # Fancy numpy to access 1 word from each page
227 _ = sum(mb.reshape((-1, _PAGESIZE)).T[0])
228 self._last_accessed_mb = mb_num
229
230 def chew(self, mbs_to_chew):
231 """Modify memory that we've chewed through, tweaking 1 word per page.
232
233 Args:
234 mbs_to_chew: Number of MBs that we'd like to try to access
235 """
236 if not self._mbs:
237 raise RuntimeError('No memory')
238
239 mb_num = self._last_accessed_mb
Douglas Andersona3c495a2020-04-27 15:57:35 -0700240 for mb_num in range(mb_num + 1, mb_num + 1 + mbs_to_chew):
Douglas Anderson518003b2017-12-21 10:12:40 -0800241 mb_num %= len(self._mbs)
242 mb = self._mbs[mb_num]
243 self.num_mbs_chewed += 1
244
245 # Fancy numpy to access 1 word from each page; we'll invert each
246 # time as our modification
247 _ = sum(mb.reshape((-1, _PAGESIZE)).T[0])
248 self._last_accessed_mb = mb_num
249
250
251class _MemInfo(object):
252 """An object that makes accessing /proc/meminfo easy.
253
254 When this object is created it will read /proc/meminfo and store all the
255 attributes it finds as integer properties. All memory quantities are
256 expressed in bytes, so if /proc/meminfo said 'MemFree' was 100 kB then our
257 MemFree attribute will be 102400.
258 """
259
260 def __init__(self):
261 with open('/proc/meminfo', 'r') as f:
262 for line in f.readlines():
263 name, _, val = line.partition(':')
264 num, _, unit = val.strip().partition(' ')
265 num = int(num)
266
267 if unit == 'kB':
268 num *= 1024
269 elif unit != '':
270 raise RuntimeError('Unexpected meminfo: %s' % line)
271
272 setattr(self, name, num)
273
274
275def _make_self_oomable():
276 """Makes sure that the current process is easily OOMable."""
277 with open('/proc/self/oom_score_adj', 'w') as f:
278 f.write('1000\n')
279
280
281def _thread_main(task_num, options, cmd_queue, done_queue):
282 """The main entry point of the worker threads.
283
284 Threads communicate with the main thread through two queues. They get
285 commands from the cmd_queue and communicate that they're done by putting
286 their task_num on the done_queue.
287
288 Args:
289 task_num: The integer ID of this task.
290 options: Options created by _parse_options()
291 cmd_queue: String commands will be put here by the main thread.
292 done_queue: We'll put our task_num on this queue when we're done with
293 our command.
294 """
295 _make_self_oomable()
296
297 muncher = _MemoryMuncher()
298
299 munch_mbs = options.munch_mbs
300 taste_mbs = options.taste_mbs
301 chew_mbs = options.chew_mbs
302
303 try:
304 cmd = None
305 while cmd != 'done':
306 cmd = cmd_queue.get()
307 if cmd == 'status':
308 print(('Task %d: allocated %d MB, munched %d MB, ' +
309 'tasted %d MB, chewed %d MB') %
310 (task_num, muncher.num_mbs_allocated,
311 muncher.num_mbs_munched, muncher.num_mbs_tasted,
312 muncher.num_mbs_chewed))
313 elif cmd == 'munch':
314 muncher.munch(munch_mbs)
315 elif cmd == 'taste':
316 muncher.taste(chew_mbs)
317 elif cmd == 'chew':
318 muncher.chew(taste_mbs)
319
320 done_queue.put(task_num)
321 except KeyboardInterrupt:
322 # Don't yell about keyboard interrupts
323 pass
324 finally:
325 print('Task %d is done' % task_num)
326 done_queue.close()
327 cmd_queue.close()
328
329
330class WorkerDeadError(RuntimeError):
331 """We throw this when we see that a worker has died."""
332 def __init__(self, task_num):
333 super(WorkerDeadError, self).__init__('Task %d is dead' % task_num)
334 self.task_num = task_num
335
336
337def _wait_everyone_done(tasks, done_queue, refill_done_queue=True):
338 """Wait until all of our workers are done.
339
340 This will wait until all tasks have put their task_num in the done_queue.
341 We'll also check to see if any tasks are dead and we'll raise an exception
342 if we notice this.
343
344 Args:
345 tasks: The list of our worker tasks.
346 done_queue: Our done queue
347 refill_done_queue: If True then we'll make sure that the done_queue
348 has each task number in it when we're done; if False then we'll
349 leave the done_queue empty.
350
351 Raises:
352 WorkerDeadError: If we notice something has died.
353 """
354 num_tasks = len(tasks)
355
356 # We want to see every task number report it's done via the done_queue; if
357 # things are taking too long we'll poll for dead children.
358 done_tasks = set()
359 while len(done_tasks) != num_tasks:
360 try:
361 task_num = done_queue.get(timeout=.5)
362 done_tasks.add(task_num)
Douglas Andersona3c495a2020-04-27 15:57:35 -0700363 except queue.Empty:
Douglas Anderson518003b2017-12-21 10:12:40 -0800364 for task_num, task in enumerate(tasks):
365 if not task.is_alive():
366 raise WorkerDeadError(task_num)
367
368 assert done_queue.empty()
369 if not refill_done_queue:
370 return
371
372 # Add everyone back to the done_queue.
Douglas Andersona3c495a2020-04-27 15:57:35 -0700373 for task_num in range(num_tasks):
Douglas Anderson518003b2017-12-21 10:12:40 -0800374 done_queue.put(task_num)
375
376
377def _end_stage(old_stage_name, tasks, done_queue, cmd_queues):
378 """End the given stage and ask wokers to print status.
379
380 Args:
381 old_stage_name: We'll print this to tell the user we finished this.
382 tasks: The list of our worker tasks.
383 done_queue: Our done queue
384 cmd_queues: A list of all task command queues.
385 """
386 num_tasks = len(tasks)
387
388 # Wait, but don't refill the queue since since we'll get the queue
389 # refilled after the workers finish printing their status.
390 _wait_everyone_done(tasks, done_queue, refill_done_queue=False)
391
392 print('Done with stage %s' % old_stage_name)
393
394 # Give the system a second to quiesce (TODO: needed?)
395 time.sleep(1)
396
397 # We'll throw an extra status update; this will refill the done_queue
Douglas Andersona3c495a2020-04-27 15:57:35 -0700398 for task_num in range(num_tasks):
Douglas Anderson518003b2017-12-21 10:12:40 -0800399 assert cmd_queues[task_num].empty()
400 cmd_queues[task_num].put('status')
401 _wait_everyone_done(tasks, done_queue)
402
403
404def _parse_options(args):
405 """Parse command line options.
406
407 Args:
408 args: sys.argv[1:]
409
410 Returns:
411 An argparse.ArgumentParser object.
412 """
413 p = subprocess.Popen(['nproc'], stdout=subprocess.PIPE,
414 stderr=subprocess.STDOUT)
415 stdout, _ = p.communicate()
416 nproc = int(stdout)
417
418 parser = argparse.ArgumentParser(
419 description=__doc__,
420 formatter_class=argparse.RawDescriptionHelpFormatter
421 )
422 parser.add_argument(
423 '-n', '--num_tasks', type=int, default=nproc,
424 help='Number of tasks to use (default: %(default)s)'
425 )
426 parser.add_argument(
427 '-z', '--munch_mbs', type=int, default=1,
428 help='Munch this many MB at a time (default: %(default)s)'
429 )
430 parser.add_argument(
431 '-s', '--free_swap', type=int, default=500,
432 help='Stop munching when free swap <= this many MB ' +
433 '(default: %(default)s)'
434 )
435 parser.add_argument(
436 '-t', '--taste', type=int, default=30,
437 help='Taste for this many seconds (default: %(default)s)'
438 )
439 parser.add_argument(
440 '-T', '--taste_mbs', type=int, default=-1,
441 help='Taste this many MB at a time (default: use munch_mbs)'
442 )
443 parser.add_argument(
444 '-c', '--chew', type=int, default=30,
445 help='Chew for this many seconds (default: %(default)s)'
446 )
447 parser.add_argument(
448 '-C', '--chew_mbs', type=int, default=-1,
449 help='Chew this many MB at a time (default: use munch_mbs)'
450 )
451 parser.add_argument(
452 '-F', '--memfree_sleep', type=int, default=0,
453 help='Sleep when memfree is < this many MB (default: %(default)s)'
454 )
455
456 options = parser.parse_args(args)
457
458 if options.taste_mbs == -1:
459 options.taste_mbs = options.munch_mbs
460 if options.chew_mbs == -1:
461 options.chew_mbs = options.munch_mbs
462
463 return options
464
465
466def main(args):
467 options = _parse_options(args)
468
469 num_tasks = options.num_tasks
470
471 done_queue = multiprocessing.Queue()
Douglas Andersona3c495a2020-04-27 15:57:35 -0700472 cmd_queues = [multiprocessing.Queue() for task_num in range(num_tasks)]
Douglas Anderson518003b2017-12-21 10:12:40 -0800473 tasks = [
474 multiprocessing.Process(
475 target=_thread_main,
476 args=(task_num, options, cmd_queues[task_num], done_queue)
477 )
Douglas Andersona3c495a2020-04-27 15:57:35 -0700478 for task_num in range(num_tasks)
Douglas Anderson518003b2017-12-21 10:12:40 -0800479 ]
480 for task in tasks:
481 task.start()
482
483 print('Starting test.')
Douglas Andersona3c495a2020-04-27 15:57:35 -0700484 for task_num in range(num_tasks):
Douglas Anderson518003b2017-12-21 10:12:40 -0800485 cmd_queues[task_num].put('status')
486 _wait_everyone_done(tasks, done_queue)
487
488 try:
489 print('Munching till swap < %d MB free; munch %d MB at a time.' %
490 (options.free_swap, options.munch_mbs))
491 while True:
492 meminfo = _MemInfo()
493 if meminfo.SwapFree < options.free_swap * _MB:
494 break
495 if meminfo.MemFree < options.memfree_sleep * _MB:
496 print('MemFree only %d MB; sleeping' % (meminfo.MemFree / _MB))
497 time.sleep(1)
498 continue
499 task_num = done_queue.get()
500 cmd_queues[task_num].put('munch')
501 _end_stage('munch', tasks, done_queue, cmd_queues)
502
503 print('Tasting for %d seconds; taste %d MB at a time.' %
504 (options.taste, options.taste_mbs))
505 end_time = time.time() + options.taste
506 while time.time() < end_time:
507 task_num = done_queue.get()
508 cmd_queues[task_num].put('taste')
509 _end_stage('taste', tasks, done_queue, cmd_queues)
510
511 print('Chewing for %d seconds; chew %d MB at a time.' %
512 (options.chew, options.chew_mbs))
513 end_time = time.time() + options.chew
514 while time.time() < end_time:
515 task_num = done_queue.get()
516 cmd_queues[task_num].put('chew')
517 _end_stage('chew', tasks, done_queue, cmd_queues)
518
519 except KeyboardInterrupt:
520 pass
521 except WorkerDeadError as error:
522 print('ERROR: %s' % str(error))
523 finally:
524 print('All done I guess; trying to end things nicely.')
525
526 # Throw in a command to try to get them to quit
527 for cmd_queue in cmd_queues:
528 cmd_queue.put('done')
529 for task in tasks:
530 task.join(10)
531 task.terminate()
532
533 done_queue.close()
534 for cmd_queue in cmd_queues:
535 cmd_queue.close()
536
537 print('Quitting')
538
539 return 0
540
541
542if __name__ == '__main__':
543 sys.exit(main(sys.argv[1:]))
544