blob: 9b6494a9752a1db681a043a2a17105b29f52f67a [file] [log] [blame]
Douglas Anderson518003b2017-12-21 10:12:40 -08001#!/usr/bin/env python2
2
3# Copyright 2017 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Mad Memory Muncher - Dynamically Organizing Non-Uniprocess Tester.
8
9This program attempts to exercise low memory situations by munching memory
10in a coordinated way across several processes.
11
12Specifically, there is a single controlling process that keeps communication
13channels open to subprocesses so that all processes can start and stop various
14parts of the test at the same time. This tester also has various clever ways
15to access memory.
16
17The main modes are: munch (allocate memory), taste (re-read already
18allocated memory), and chew (modify already allocated memory). Whenever
19possible we try to put some sane values into memory so that any memory
20compression will behave in a real-world-like way.
21
22At the moment this program always makes sure that all of the child
23sub-processes are set to have an OOM score of 1000 (easy to kill them) and
24the parent has a default OOM score (unkillable on Chrome OS). At various
25checkpoints in the test the parent looks for dead children and stops the test.
26
27NOTES:
28- The way this program works is subject to chagne depending on the needs
29 of people stressing memory. Don't rely on command line arguments staying
30 consistent. If we need a consistent test, we could fork this or add a
31 consistent subcommand.
32- You should probably have KASAN and slub_debug turned off when running this.
33 If you have those on then you're not doing a real test of the memory system
34 and shouldn't be surprised that it can't keep up.
35
36Examples:
37 1. Launch one process per CPU and aim for 500 MB swap left. Re-access
38 memory for 70 seconds and then access/modify memory for 90 seconds:
39
40 mmm_donut --free_swap=500 --taste=70 --chew=90
41 2. Like #1 but use 200 processes. Note that since by default each
42 process will access 1MB at a time we'll probably really end up stopping
43 at closer to 300 MB free swap or less (the muncher stops telling
44 sub-processes to allocate when free swap is 500 MB, but then any
45 outstanding allocatoins will finish.
46
47 mmm_donut -n200 --free_swap=500 --taste=70 --chew=90
48 3. Like #1 but have children allocate 20MB chunks. This will act to
49 more quickly allocate memory but will also over-allocate a bit more.
50 On a 6-CPU system you might overallocate by 120MB.
51
52 mmm_donut --free_swap=500 --munch_mbs=20 --taste=70 --chew=90
53"""
54
55
56from __future__ import print_function
57
58import argparse
59import ctypes
60import multiprocessing
61import numpy
62import os
63import Queue
64import subprocess
65import sys
66import time
67
68libc = ctypes.CDLL('libc.so.6')
69libc.free.argtypes = [ctypes.c_void_p]
70libc.free.restype = None
71libc.valloc.argtypes = [ctypes.c_size_t]
72libc.valloc.restype = ctypes.c_void_p
73
74# By default, we'll fill memory with data based on the contents of this
75# file. Ideally it should be a big file and fairly representative of
76# what we expect memory to contain.
77_DEFAULT_FILE_TO_MAP = '/opt/google/chrome/chrome'
78
79_KB = 1024
80_MB = _KB * _KB
81
82# For the purpose of this program, a 'word' is 32-bits.
83_WORDS_PER_MB = _MB / 4
84
85_PAGESIZE = os.sysconf('SC_PAGESIZE')
86
87
88class _MemoryMuncher(object):
89 """A class for eating memory.
90
91 This class has functions in it for efficiently munching up memory.
92 Specifically, it has a few things it can do:
93
94 munch: This will allocate more memory and fill it with data copied from
95 a prototype datasource. It will attempt to make this data 'unique'
96 by adding a value to each word based on the current PID. Allocating
97 is done 1 MB at a time and done with valloc() so we get page-sized
98 allocations. Copying / making unique is done with numpy to get
99 reasonably efficiency.
100 taste: This will re-read memory (1MB at a time) that's already been munched,
101 which ought to cause it to get paged in. We read 1 word from each page.
102 Again we use numpy which ought to make it somewhat efficient.
103 chew: This will attempt to read-modify-write memory (1MB at a time) that's
104 already been munched. This ought to have no huge performance difference
105 than taste.
106 spit: This will release memory allocated by munch.
107
108 Attributes:
109 num_mbs_allocated: Number of MB that are currently allocated.
110 num_mbs_munched: Number of MB that have been munched in total. Note
111 that if you munch something and then spit it out it still counts in
112 this number, so munch(30); spit(10); munch(20) => 50.
113 num_mbs_tasted: Number of MB that have been tasted in total.
114 num_mbs_chewed: Number of MB that have been chewed in total.
115 """
116
117 def __init__(self, proto_data=None):
118 """Create a MemoryMuncher object.
119
120 Args:
121 proto_data: A numpy.memmap array, or None for the default. We'll
122 use this as prototype data to copy to our allocated pages.
123 """
124 if not proto_data:
125 proto_data = numpy.memmap(_DEFAULT_FILE_TO_MAP,
126 dtype='uint32', mode='r')
127 self._proto_data = proto_data
128 self._num_proto_mbs = len(self._proto_data) / _WORDS_PER_MB
129 self._at_proto_mb = 0
130
131 # Every time we munch through a chunk we'll add this to each integer to
132 # make the chunk look unique, then increment it.
133 self._unique = os.getpid() << 16
134
135 self._mbs = []
136 self._last_accessed_mb = -1
137
138 self.num_mbs_munched = 0
139 self.num_mbs_tasted = 0
140 self.num_mbs_chewed = 0
141
142 @property
143 def num_mbs_allocated(self):
144 return len(self._mbs)
145
146 def _alloc_array(self, n, element_type=ctypes.c_uint8):
147 """Allocate a numpy array using libc.valloc (page aligned allocation).
148
149 Args:
150 n: Number of elements in the array
151 element_type: The type of the element (a ctypes type)
152 """
153 ptr = libc.valloc(n * ctypes.sizeof(element_type))
154 ptr = ctypes.cast(ptr, ctypes.POINTER(element_type))
155
156 return numpy.ctypeslib.as_array(ptr, shape=(n,))
157
158 def _free_array(self, arr):
159 """Free a numpy array allocated with _alloc_array
160
161 Args:
162 arr: The return value from _alloc_array.
163 """
164 ptr = ctypes.cast(arr, ctypes.c_void_p)
165 libc.free(ptr)
166
167 def munch(self, mbs_to_munch, quick_alloc=False):
168 """Allocate the given number of mbs, filling the memory with data.
169
170 Args:
171 mbs_to_munch: The number of MBs to allocate.
172 quick_alloc: If true, we'll try to allocate quicker by not using
173 the proto data; we'll just put a unique value in the first
174 word of the page.
175 """
176 for _ in xrange(mbs_to_munch):
177 # Allocate some memory using libc; give back a numpy object
178 mb = self._alloc_array(_WORDS_PER_MB, ctypes.c_uint32)
179
180 # Copy data from our proto data making it unique by adding a
181 # unique integer to each word.
182 mb[0] = self._unique
183
184 if quick_alloc:
185 # Don't even bother to zero memory, but put at least something
186 # unique per page
187 mb.reshape((_PAGESIZE, -1)).T[0] = self._unique
188 else:
189 # Copy from the next spot in the prototype
190 # As we copy, add the unique data based on our PID.
191 mb[:] = (self._proto_data[self._at_proto_mb *
192 _WORDS_PER_MB:
193 (self._at_proto_mb + 1) *
194 _WORDS_PER_MB] + self._unique)
195
196 # Update so we're ready for the next time
197 self._at_proto_mb += 1
198 self._at_proto_mb %= self._num_proto_mbs
199 self._unique += 1
200
201 self._mbs.append(mb)
202 self.num_mbs_munched += 1
203
204 def spit(self, mbs_to_spit):
205 """Spit (free) out the oldest munched memory.
206
207 Args:
208 mbs_to_spit: Number of MBs to spit.
209 """
210 for _ in xrange(mbs_to_spit):
211 if not self._mbs:
212 raise RuntimeError('No more memory to spit out')
213 self._free_array(self._mbs.pop(0))
214
215 def taste(self, mbs_to_taste):
216 """Access memory that we've chewed through, reading 1 word per page.
217
218 Args:
219 mbs_to_taste: Number of MBs that we'd like to try to access
220 """
221 if not self._mbs:
222 raise RuntimeError('No memory')
223
224 mb_num = self._last_accessed_mb
225 for mb_num in xrange(mb_num + 1, mb_num + 1 + mbs_to_taste):
226 mb_num %= len(self._mbs)
227 mb = self._mbs[mb_num]
228 self.num_mbs_tasted += 1
229 # Fancy numpy to access 1 word from each page
230 _ = sum(mb.reshape((-1, _PAGESIZE)).T[0])
231 self._last_accessed_mb = mb_num
232
233 def chew(self, mbs_to_chew):
234 """Modify memory that we've chewed through, tweaking 1 word per page.
235
236 Args:
237 mbs_to_chew: Number of MBs that we'd like to try to access
238 """
239 if not self._mbs:
240 raise RuntimeError('No memory')
241
242 mb_num = self._last_accessed_mb
243 for mb_num in xrange(mb_num + 1, mb_num + 1 + mbs_to_chew):
244 mb_num %= len(self._mbs)
245 mb = self._mbs[mb_num]
246 self.num_mbs_chewed += 1
247
248 # Fancy numpy to access 1 word from each page; we'll invert each
249 # time as our modification
250 _ = sum(mb.reshape((-1, _PAGESIZE)).T[0])
251 self._last_accessed_mb = mb_num
252
253
254class _MemInfo(object):
255 """An object that makes accessing /proc/meminfo easy.
256
257 When this object is created it will read /proc/meminfo and store all the
258 attributes it finds as integer properties. All memory quantities are
259 expressed in bytes, so if /proc/meminfo said 'MemFree' was 100 kB then our
260 MemFree attribute will be 102400.
261 """
262
263 def __init__(self):
264 with open('/proc/meminfo', 'r') as f:
265 for line in f.readlines():
266 name, _, val = line.partition(':')
267 num, _, unit = val.strip().partition(' ')
268 num = int(num)
269
270 if unit == 'kB':
271 num *= 1024
272 elif unit != '':
273 raise RuntimeError('Unexpected meminfo: %s' % line)
274
275 setattr(self, name, num)
276
277
278def _make_self_oomable():
279 """Makes sure that the current process is easily OOMable."""
280 with open('/proc/self/oom_score_adj', 'w') as f:
281 f.write('1000\n')
282
283
284def _thread_main(task_num, options, cmd_queue, done_queue):
285 """The main entry point of the worker threads.
286
287 Threads communicate with the main thread through two queues. They get
288 commands from the cmd_queue and communicate that they're done by putting
289 their task_num on the done_queue.
290
291 Args:
292 task_num: The integer ID of this task.
293 options: Options created by _parse_options()
294 cmd_queue: String commands will be put here by the main thread.
295 done_queue: We'll put our task_num on this queue when we're done with
296 our command.
297 """
298 _make_self_oomable()
299
300 muncher = _MemoryMuncher()
301
302 munch_mbs = options.munch_mbs
303 taste_mbs = options.taste_mbs
304 chew_mbs = options.chew_mbs
305
306 try:
307 cmd = None
308 while cmd != 'done':
309 cmd = cmd_queue.get()
310 if cmd == 'status':
311 print(('Task %d: allocated %d MB, munched %d MB, ' +
312 'tasted %d MB, chewed %d MB') %
313 (task_num, muncher.num_mbs_allocated,
314 muncher.num_mbs_munched, muncher.num_mbs_tasted,
315 muncher.num_mbs_chewed))
316 elif cmd == 'munch':
317 muncher.munch(munch_mbs)
318 elif cmd == 'taste':
319 muncher.taste(chew_mbs)
320 elif cmd == 'chew':
321 muncher.chew(taste_mbs)
322
323 done_queue.put(task_num)
324 except KeyboardInterrupt:
325 # Don't yell about keyboard interrupts
326 pass
327 finally:
328 print('Task %d is done' % task_num)
329 done_queue.close()
330 cmd_queue.close()
331
332
333class WorkerDeadError(RuntimeError):
334 """We throw this when we see that a worker has died."""
335 def __init__(self, task_num):
336 super(WorkerDeadError, self).__init__('Task %d is dead' % task_num)
337 self.task_num = task_num
338
339
340def _wait_everyone_done(tasks, done_queue, refill_done_queue=True):
341 """Wait until all of our workers are done.
342
343 This will wait until all tasks have put their task_num in the done_queue.
344 We'll also check to see if any tasks are dead and we'll raise an exception
345 if we notice this.
346
347 Args:
348 tasks: The list of our worker tasks.
349 done_queue: Our done queue
350 refill_done_queue: If True then we'll make sure that the done_queue
351 has each task number in it when we're done; if False then we'll
352 leave the done_queue empty.
353
354 Raises:
355 WorkerDeadError: If we notice something has died.
356 """
357 num_tasks = len(tasks)
358
359 # We want to see every task number report it's done via the done_queue; if
360 # things are taking too long we'll poll for dead children.
361 done_tasks = set()
362 while len(done_tasks) != num_tasks:
363 try:
364 task_num = done_queue.get(timeout=.5)
365 done_tasks.add(task_num)
366 except Queue.Empty:
367 for task_num, task in enumerate(tasks):
368 if not task.is_alive():
369 raise WorkerDeadError(task_num)
370
371 assert done_queue.empty()
372 if not refill_done_queue:
373 return
374
375 # Add everyone back to the done_queue.
376 for task_num in xrange(num_tasks):
377 done_queue.put(task_num)
378
379
380def _end_stage(old_stage_name, tasks, done_queue, cmd_queues):
381 """End the given stage and ask wokers to print status.
382
383 Args:
384 old_stage_name: We'll print this to tell the user we finished this.
385 tasks: The list of our worker tasks.
386 done_queue: Our done queue
387 cmd_queues: A list of all task command queues.
388 """
389 num_tasks = len(tasks)
390
391 # Wait, but don't refill the queue since since we'll get the queue
392 # refilled after the workers finish printing their status.
393 _wait_everyone_done(tasks, done_queue, refill_done_queue=False)
394
395 print('Done with stage %s' % old_stage_name)
396
397 # Give the system a second to quiesce (TODO: needed?)
398 time.sleep(1)
399
400 # We'll throw an extra status update; this will refill the done_queue
401 for task_num in xrange(num_tasks):
402 assert cmd_queues[task_num].empty()
403 cmd_queues[task_num].put('status')
404 _wait_everyone_done(tasks, done_queue)
405
406
407def _parse_options(args):
408 """Parse command line options.
409
410 Args:
411 args: sys.argv[1:]
412
413 Returns:
414 An argparse.ArgumentParser object.
415 """
416 p = subprocess.Popen(['nproc'], stdout=subprocess.PIPE,
417 stderr=subprocess.STDOUT)
418 stdout, _ = p.communicate()
419 nproc = int(stdout)
420
421 parser = argparse.ArgumentParser(
422 description=__doc__,
423 formatter_class=argparse.RawDescriptionHelpFormatter
424 )
425 parser.add_argument(
426 '-n', '--num_tasks', type=int, default=nproc,
427 help='Number of tasks to use (default: %(default)s)'
428 )
429 parser.add_argument(
430 '-z', '--munch_mbs', type=int, default=1,
431 help='Munch this many MB at a time (default: %(default)s)'
432 )
433 parser.add_argument(
434 '-s', '--free_swap', type=int, default=500,
435 help='Stop munching when free swap <= this many MB ' +
436 '(default: %(default)s)'
437 )
438 parser.add_argument(
439 '-t', '--taste', type=int, default=30,
440 help='Taste for this many seconds (default: %(default)s)'
441 )
442 parser.add_argument(
443 '-T', '--taste_mbs', type=int, default=-1,
444 help='Taste this many MB at a time (default: use munch_mbs)'
445 )
446 parser.add_argument(
447 '-c', '--chew', type=int, default=30,
448 help='Chew for this many seconds (default: %(default)s)'
449 )
450 parser.add_argument(
451 '-C', '--chew_mbs', type=int, default=-1,
452 help='Chew this many MB at a time (default: use munch_mbs)'
453 )
454 parser.add_argument(
455 '-F', '--memfree_sleep', type=int, default=0,
456 help='Sleep when memfree is < this many MB (default: %(default)s)'
457 )
458
459 options = parser.parse_args(args)
460
461 if options.taste_mbs == -1:
462 options.taste_mbs = options.munch_mbs
463 if options.chew_mbs == -1:
464 options.chew_mbs = options.munch_mbs
465
466 return options
467
468
469def main(args):
470 options = _parse_options(args)
471
472 num_tasks = options.num_tasks
473
474 done_queue = multiprocessing.Queue()
475 cmd_queues = [multiprocessing.Queue() for task_num in xrange(num_tasks)]
476 tasks = [
477 multiprocessing.Process(
478 target=_thread_main,
479 args=(task_num, options, cmd_queues[task_num], done_queue)
480 )
481 for task_num in xrange(num_tasks)
482 ]
483 for task in tasks:
484 task.start()
485
486 print('Starting test.')
487 for task_num in xrange(num_tasks):
488 cmd_queues[task_num].put('status')
489 _wait_everyone_done(tasks, done_queue)
490
491 try:
492 print('Munching till swap < %d MB free; munch %d MB at a time.' %
493 (options.free_swap, options.munch_mbs))
494 while True:
495 meminfo = _MemInfo()
496 if meminfo.SwapFree < options.free_swap * _MB:
497 break
498 if meminfo.MemFree < options.memfree_sleep * _MB:
499 print('MemFree only %d MB; sleeping' % (meminfo.MemFree / _MB))
500 time.sleep(1)
501 continue
502 task_num = done_queue.get()
503 cmd_queues[task_num].put('munch')
504 _end_stage('munch', tasks, done_queue, cmd_queues)
505
506 print('Tasting for %d seconds; taste %d MB at a time.' %
507 (options.taste, options.taste_mbs))
508 end_time = time.time() + options.taste
509 while time.time() < end_time:
510 task_num = done_queue.get()
511 cmd_queues[task_num].put('taste')
512 _end_stage('taste', tasks, done_queue, cmd_queues)
513
514 print('Chewing for %d seconds; chew %d MB at a time.' %
515 (options.chew, options.chew_mbs))
516 end_time = time.time() + options.chew
517 while time.time() < end_time:
518 task_num = done_queue.get()
519 cmd_queues[task_num].put('chew')
520 _end_stage('chew', tasks, done_queue, cmd_queues)
521
522 except KeyboardInterrupt:
523 pass
524 except WorkerDeadError as error:
525 print('ERROR: %s' % str(error))
526 finally:
527 print('All done I guess; trying to end things nicely.')
528
529 # Throw in a command to try to get them to quit
530 for cmd_queue in cmd_queues:
531 cmd_queue.put('done')
532 for task in tasks:
533 task.join(10)
534 task.terminate()
535
536 done_queue.close()
537 for cmd_queue in cmd_queues:
538 cmd_queue.close()
539
540 print('Quitting')
541
542 return 0
543
544
545if __name__ == '__main__':
546 sys.exit(main(sys.argv[1:]))
547