Blame - mmm_donut.py - chromium.googlesource.com/chromiumos/platform/microbenchmarks

blob: 9b6494a9752a1db681a043a2a17105b29f52f67a [file] [log] [blame]

Douglas Anderson	518003b	2017-12-21 10:12:40 -0800	[diff] [blame^]	1	#!/usr/bin/env python2
				2
				3	# Copyright 2017 The Chromium OS Authors. All rights reserved.
				4	# Use of this source code is governed by a BSD-style license that can be
				5	# found in the LICENSE file.
				6
				7	"""Mad Memory Muncher - Dynamically Organizing Non-Uniprocess Tester.
				8
				9	This program attempts to exercise low memory situations by munching memory
				10	in a coordinated way across several processes.
				11
				12	Specifically, there is a single controlling process that keeps communication
				13	channels open to subprocesses so that all processes can start and stop various
				14	parts of the test at the same time. This tester also has various clever ways
				15	to access memory.
				16
				17	The main modes are: munch (allocate memory), taste (re-read already
				18	allocated memory), and chew (modify already allocated memory). Whenever
				19	possible we try to put some sane values into memory so that any memory
				20	compression will behave in a real-world-like way.
				21
				22	At the moment this program always makes sure that all of the child
				23	sub-processes are set to have an OOM score of 1000 (easy to kill them) and
				24	the parent has a default OOM score (unkillable on Chrome OS). At various
				25	checkpoints in the test the parent looks for dead children and stops the test.
				26
				27	NOTES:
				28	- The way this program works is subject to chagne depending on the needs
				29	of people stressing memory. Don't rely on command line arguments staying
				30	consistent. If we need a consistent test, we could fork this or add a
				31	consistent subcommand.
				32	- You should probably have KASAN and slub_debug turned off when running this.
				33	If you have those on then you're not doing a real test of the memory system
				34	and shouldn't be surprised that it can't keep up.
				35
				36	Examples:
				37	1. Launch one process per CPU and aim for 500 MB swap left. Re-access
				38	memory for 70 seconds and then access/modify memory for 90 seconds:
				39
				40	mmm_donut --free_swap=500 --taste=70 --chew=90
				41	2. Like #1 but use 200 processes. Note that since by default each
				42	process will access 1MB at a time we'll probably really end up stopping
				43	at closer to 300 MB free swap or less (the muncher stops telling
				44	sub-processes to allocate when free swap is 500 MB, but then any
				45	outstanding allocatoins will finish.
				46
				47	mmm_donut -n200 --free_swap=500 --taste=70 --chew=90
				48	3. Like #1 but have children allocate 20MB chunks. This will act to
				49	more quickly allocate memory but will also over-allocate a bit more.
				50	On a 6-CPU system you might overallocate by 120MB.
				51
				52	mmm_donut --free_swap=500 --munch_mbs=20 --taste=70 --chew=90
				53	"""
				54
				55
				56	from __future__ import print_function
				57
				58	import argparse
				59	import ctypes
				60	import multiprocessing
				61	import numpy
				62	import os
				63	import Queue
				64	import subprocess
				65	import sys
				66	import time
				67
				68	libc = ctypes.CDLL('libc.so.6')
				69	libc.free.argtypes = [ctypes.c_void_p]
				70	libc.free.restype = None
				71	libc.valloc.argtypes = [ctypes.c_size_t]
				72	libc.valloc.restype = ctypes.c_void_p
				73
				74	# By default, we'll fill memory with data based on the contents of this
				75	# file. Ideally it should be a big file and fairly representative of
				76	# what we expect memory to contain.
				77	_DEFAULT_FILE_TO_MAP = '/opt/google/chrome/chrome'
				78
				79	_KB = 1024
				80	_MB = _KB * _KB
				81
				82	# For the purpose of this program, a 'word' is 32-bits.
				83	_WORDS_PER_MB = _MB / 4
				84
				85	_PAGESIZE = os.sysconf('SC_PAGESIZE')
				86
				87
				88	class _MemoryMuncher(object):
				89	"""A class for eating memory.
				90
				91	This class has functions in it for efficiently munching up memory.
				92	Specifically, it has a few things it can do:
				93
				94	munch: This will allocate more memory and fill it with data copied from
				95	a prototype datasource. It will attempt to make this data 'unique'
				96	by adding a value to each word based on the current PID. Allocating
				97	is done 1 MB at a time and done with valloc() so we get page-sized
				98	allocations. Copying / making unique is done with numpy to get
				99	reasonably efficiency.
				100	taste: This will re-read memory (1MB at a time) that's already been munched,
				101	which ought to cause it to get paged in. We read 1 word from each page.
				102	Again we use numpy which ought to make it somewhat efficient.
				103	chew: This will attempt to read-modify-write memory (1MB at a time) that's
				104	already been munched. This ought to have no huge performance difference
				105	than taste.
				106	spit: This will release memory allocated by munch.
				107
				108	Attributes:
				109	num_mbs_allocated: Number of MB that are currently allocated.
				110	num_mbs_munched: Number of MB that have been munched in total. Note
				111	that if you munch something and then spit it out it still counts in
				112	this number, so munch(30); spit(10); munch(20) => 50.
				113	num_mbs_tasted: Number of MB that have been tasted in total.
				114	num_mbs_chewed: Number of MB that have been chewed in total.
				115	"""
				116
				117	def __init__(self, proto_data=None):
				118	"""Create a MemoryMuncher object.
				119
				120	Args:
				121	proto_data: A numpy.memmap array, or None for the default. We'll
				122	use this as prototype data to copy to our allocated pages.
				123	"""
				124	if not proto_data:
				125	proto_data = numpy.memmap(_DEFAULT_FILE_TO_MAP,
				126	dtype='uint32', mode='r')
				127	self._proto_data = proto_data
				128	self._num_proto_mbs = len(self._proto_data) / _WORDS_PER_MB
				129	self._at_proto_mb = 0
				130
				131	# Every time we munch through a chunk we'll add this to each integer to
				132	# make the chunk look unique, then increment it.
				133	self._unique = os.getpid() << 16
				134
				135	self._mbs = []
				136	self._last_accessed_mb = -1
				137
				138	self.num_mbs_munched = 0
				139	self.num_mbs_tasted = 0
				140	self.num_mbs_chewed = 0
				141
				142	@property
				143	def num_mbs_allocated(self):
				144	return len(self._mbs)
				145
				146	def _alloc_array(self, n, element_type=ctypes.c_uint8):
				147	"""Allocate a numpy array using libc.valloc (page aligned allocation).
				148
				149	Args:
				150	n: Number of elements in the array
				151	element_type: The type of the element (a ctypes type)
				152	"""
				153	ptr = libc.valloc(n * ctypes.sizeof(element_type))
				154	ptr = ctypes.cast(ptr, ctypes.POINTER(element_type))
				155
				156	return numpy.ctypeslib.as_array(ptr, shape=(n,))
				157
				158	def _free_array(self, arr):
				159	"""Free a numpy array allocated with _alloc_array
				160
				161	Args:
				162	arr: The return value from _alloc_array.
				163	"""
				164	ptr = ctypes.cast(arr, ctypes.c_void_p)
				165	libc.free(ptr)
				166
				167	def munch(self, mbs_to_munch, quick_alloc=False):
				168	"""Allocate the given number of mbs, filling the memory with data.
				169
				170	Args:
				171	mbs_to_munch: The number of MBs to allocate.
				172	quick_alloc: If true, we'll try to allocate quicker by not using
				173	the proto data; we'll just put a unique value in the first
				174	word of the page.
				175	"""
				176	for _ in xrange(mbs_to_munch):
				177	# Allocate some memory using libc; give back a numpy object
				178	mb = self._alloc_array(_WORDS_PER_MB, ctypes.c_uint32)
				179
				180	# Copy data from our proto data making it unique by adding a
				181	# unique integer to each word.
				182	mb[0] = self._unique
				183
				184	if quick_alloc:
				185	# Don't even bother to zero memory, but put at least something
				186	# unique per page
				187	mb.reshape((_PAGESIZE, -1)).T[0] = self._unique
				188	else:
				189	# Copy from the next spot in the prototype
				190	# As we copy, add the unique data based on our PID.
				191	mb[:] = (self._proto_data[self._at_proto_mb *
				192	_WORDS_PER_MB:
				193	(self._at_proto_mb + 1) *
				194	_WORDS_PER_MB] + self._unique)
				195
				196	# Update so we're ready for the next time
				197	self._at_proto_mb += 1
				198	self._at_proto_mb %= self._num_proto_mbs
				199	self._unique += 1
				200
				201	self._mbs.append(mb)
				202	self.num_mbs_munched += 1
				203
				204	def spit(self, mbs_to_spit):
				205	"""Spit (free) out the oldest munched memory.
				206
				207	Args:
				208	mbs_to_spit: Number of MBs to spit.
				209	"""
				210	for _ in xrange(mbs_to_spit):
				211	if not self._mbs:
				212	raise RuntimeError('No more memory to spit out')
				213	self._free_array(self._mbs.pop(0))
				214
				215	def taste(self, mbs_to_taste):
				216	"""Access memory that we've chewed through, reading 1 word per page.
				217
				218	Args:
				219	mbs_to_taste: Number of MBs that we'd like to try to access
				220	"""
				221	if not self._mbs:
				222	raise RuntimeError('No memory')
				223
				224	mb_num = self._last_accessed_mb
				225	for mb_num in xrange(mb_num + 1, mb_num + 1 + mbs_to_taste):
				226	mb_num %= len(self._mbs)
				227	mb = self._mbs[mb_num]
				228	self.num_mbs_tasted += 1
				229	# Fancy numpy to access 1 word from each page
				230	_ = sum(mb.reshape((-1, _PAGESIZE)).T[0])
				231	self._last_accessed_mb = mb_num
				232
				233	def chew(self, mbs_to_chew):
				234	"""Modify memory that we've chewed through, tweaking 1 word per page.
				235
				236	Args:
				237	mbs_to_chew: Number of MBs that we'd like to try to access
				238	"""
				239	if not self._mbs:
				240	raise RuntimeError('No memory')
				241
				242	mb_num = self._last_accessed_mb
				243	for mb_num in xrange(mb_num + 1, mb_num + 1 + mbs_to_chew):
				244	mb_num %= len(self._mbs)
				245	mb = self._mbs[mb_num]
				246	self.num_mbs_chewed += 1
				247
				248	# Fancy numpy to access 1 word from each page; we'll invert each
				249	# time as our modification
				250	_ = sum(mb.reshape((-1, _PAGESIZE)).T[0])
				251	self._last_accessed_mb = mb_num
				252
				253
				254	class _MemInfo(object):
				255	"""An object that makes accessing /proc/meminfo easy.
				256
				257	When this object is created it will read /proc/meminfo and store all the
				258	attributes it finds as integer properties. All memory quantities are
				259	expressed in bytes, so if /proc/meminfo said 'MemFree' was 100 kB then our
				260	MemFree attribute will be 102400.
				261	"""
				262
				263	def __init__(self):
				264	with open('/proc/meminfo', 'r') as f:
				265	for line in f.readlines():
				266	name, _, val = line.partition(':')
				267	num, _, unit = val.strip().partition(' ')
				268	num = int(num)
				269
				270	if unit == 'kB':
				271	num *= 1024
				272	elif unit != '':
				273	raise RuntimeError('Unexpected meminfo: %s' % line)
				274
				275	setattr(self, name, num)
				276
				277
				278	def _make_self_oomable():
				279	"""Makes sure that the current process is easily OOMable."""
				280	with open('/proc/self/oom_score_adj', 'w') as f:
				281	f.write('1000\n')
				282
				283
				284	def _thread_main(task_num, options, cmd_queue, done_queue):
				285	"""The main entry point of the worker threads.
				286
				287	Threads communicate with the main thread through two queues. They get
				288	commands from the cmd_queue and communicate that they're done by putting
				289	their task_num on the done_queue.
				290
				291	Args:
				292	task_num: The integer ID of this task.
				293	options: Options created by _parse_options()
				294	cmd_queue: String commands will be put here by the main thread.
				295	done_queue: We'll put our task_num on this queue when we're done with
				296	our command.
				297	"""
				298	_make_self_oomable()
				299
				300	muncher = _MemoryMuncher()
				301
				302	munch_mbs = options.munch_mbs
				303	taste_mbs = options.taste_mbs
				304	chew_mbs = options.chew_mbs
				305
				306	try:
				307	cmd = None
				308	while cmd != 'done':
				309	cmd = cmd_queue.get()
				310	if cmd == 'status':
				311	print(('Task %d: allocated %d MB, munched %d MB, ' +
				312	'tasted %d MB, chewed %d MB') %
				313	(task_num, muncher.num_mbs_allocated,
				314	muncher.num_mbs_munched, muncher.num_mbs_tasted,
				315	muncher.num_mbs_chewed))
				316	elif cmd == 'munch':
				317	muncher.munch(munch_mbs)
				318	elif cmd == 'taste':
				319	muncher.taste(chew_mbs)
				320	elif cmd == 'chew':
				321	muncher.chew(taste_mbs)
				322
				323	done_queue.put(task_num)
				324	except KeyboardInterrupt:
				325	# Don't yell about keyboard interrupts
				326	pass
				327	finally:
				328	print('Task %d is done' % task_num)
				329	done_queue.close()
				330	cmd_queue.close()
				331
				332
				333	class WorkerDeadError(RuntimeError):
				334	"""We throw this when we see that a worker has died."""
				335	def __init__(self, task_num):
				336	super(WorkerDeadError, self).__init__('Task %d is dead' % task_num)
				337	self.task_num = task_num
				338
				339
				340	def _wait_everyone_done(tasks, done_queue, refill_done_queue=True):
				341	"""Wait until all of our workers are done.
				342
				343	This will wait until all tasks have put their task_num in the done_queue.
				344	We'll also check to see if any tasks are dead and we'll raise an exception
				345	if we notice this.
				346
				347	Args:
				348	tasks: The list of our worker tasks.
				349	done_queue: Our done queue
				350	refill_done_queue: If True then we'll make sure that the done_queue
				351	has each task number in it when we're done; if False then we'll
				352	leave the done_queue empty.
				353
				354	Raises:
				355	WorkerDeadError: If we notice something has died.
				356	"""
				357	num_tasks = len(tasks)
				358
				359	# We want to see every task number report it's done via the done_queue; if
				360	# things are taking too long we'll poll for dead children.
				361	done_tasks = set()
				362	while len(done_tasks) != num_tasks:
				363	try:
				364	task_num = done_queue.get(timeout=.5)
				365	done_tasks.add(task_num)
				366	except Queue.Empty:
				367	for task_num, task in enumerate(tasks):
				368	if not task.is_alive():
				369	raise WorkerDeadError(task_num)
				370
				371	assert done_queue.empty()
				372	if not refill_done_queue:
				373	return
				374
				375	# Add everyone back to the done_queue.
				376	for task_num in xrange(num_tasks):
				377	done_queue.put(task_num)
				378
				379
				380	def _end_stage(old_stage_name, tasks, done_queue, cmd_queues):
				381	"""End the given stage and ask wokers to print status.
				382
				383	Args:
				384	old_stage_name: We'll print this to tell the user we finished this.
				385	tasks: The list of our worker tasks.
				386	done_queue: Our done queue
				387	cmd_queues: A list of all task command queues.
				388	"""
				389	num_tasks = len(tasks)
				390
				391	# Wait, but don't refill the queue since since we'll get the queue
				392	# refilled after the workers finish printing their status.
				393	_wait_everyone_done(tasks, done_queue, refill_done_queue=False)
				394
				395	print('Done with stage %s' % old_stage_name)
				396
				397	# Give the system a second to quiesce (TODO: needed?)
				398	time.sleep(1)
				399
				400	# We'll throw an extra status update; this will refill the done_queue
				401	for task_num in xrange(num_tasks):
				402	assert cmd_queues[task_num].empty()
				403	cmd_queues[task_num].put('status')
				404	_wait_everyone_done(tasks, done_queue)
				405
				406
				407	def _parse_options(args):
				408	"""Parse command line options.
				409
				410	Args:
				411	args: sys.argv[1:]
				412
				413	Returns:
				414	An argparse.ArgumentParser object.
				415	"""
				416	p = subprocess.Popen(['nproc'], stdout=subprocess.PIPE,
				417	stderr=subprocess.STDOUT)
				418	stdout, _ = p.communicate()
				419	nproc = int(stdout)
				420
				421	parser = argparse.ArgumentParser(
				422	description=__doc__,
				423	formatter_class=argparse.RawDescriptionHelpFormatter
				424	)
				425	parser.add_argument(
				426	'-n', '--num_tasks', type=int, default=nproc,
				427	help='Number of tasks to use (default: %(default)s)'
				428	)
				429	parser.add_argument(
				430	'-z', '--munch_mbs', type=int, default=1,
				431	help='Munch this many MB at a time (default: %(default)s)'
				432	)
				433	parser.add_argument(
				434	'-s', '--free_swap', type=int, default=500,
				435	help='Stop munching when free swap <= this many MB ' +
				436	'(default: %(default)s)'
				437	)
				438	parser.add_argument(
				439	'-t', '--taste', type=int, default=30,
				440	help='Taste for this many seconds (default: %(default)s)'
				441	)
				442	parser.add_argument(
				443	'-T', '--taste_mbs', type=int, default=-1,
				444	help='Taste this many MB at a time (default: use munch_mbs)'
				445	)
				446	parser.add_argument(
				447	'-c', '--chew', type=int, default=30,
				448	help='Chew for this many seconds (default: %(default)s)'
				449	)
				450	parser.add_argument(
				451	'-C', '--chew_mbs', type=int, default=-1,
				452	help='Chew this many MB at a time (default: use munch_mbs)'
				453	)
				454	parser.add_argument(
				455	'-F', '--memfree_sleep', type=int, default=0,
				456	help='Sleep when memfree is < this many MB (default: %(default)s)'
				457	)
				458
				459	options = parser.parse_args(args)
				460
				461	if options.taste_mbs == -1:
				462	options.taste_mbs = options.munch_mbs
				463	if options.chew_mbs == -1:
				464	options.chew_mbs = options.munch_mbs
				465
				466	return options
				467
				468
				469	def main(args):
				470	options = _parse_options(args)
				471
				472	num_tasks = options.num_tasks
				473
				474	done_queue = multiprocessing.Queue()
				475	cmd_queues = [multiprocessing.Queue() for task_num in xrange(num_tasks)]
				476	tasks = [
				477	multiprocessing.Process(
				478	target=_thread_main,
				479	args=(task_num, options, cmd_queues[task_num], done_queue)
				480	)
				481	for task_num in xrange(num_tasks)
				482	]
				483	for task in tasks:
				484	task.start()
				485
				486	print('Starting test.')
				487	for task_num in xrange(num_tasks):
				488	cmd_queues[task_num].put('status')
				489	_wait_everyone_done(tasks, done_queue)
				490
				491	try:
				492	print('Munching till swap < %d MB free; munch %d MB at a time.' %
				493	(options.free_swap, options.munch_mbs))
				494	while True:
				495	meminfo = _MemInfo()
				496	if meminfo.SwapFree < options.free_swap * _MB:
				497	break
				498	if meminfo.MemFree < options.memfree_sleep * _MB:
				499	print('MemFree only %d MB; sleeping' % (meminfo.MemFree / _MB))
				500	time.sleep(1)
				501	continue
				502	task_num = done_queue.get()
				503	cmd_queues[task_num].put('munch')
				504	_end_stage('munch', tasks, done_queue, cmd_queues)
				505
				506	print('Tasting for %d seconds; taste %d MB at a time.' %
				507	(options.taste, options.taste_mbs))
				508	end_time = time.time() + options.taste
				509	while time.time() < end_time:
				510	task_num = done_queue.get()
				511	cmd_queues[task_num].put('taste')
				512	_end_stage('taste', tasks, done_queue, cmd_queues)
				513
				514	print('Chewing for %d seconds; chew %d MB at a time.' %
				515	(options.chew, options.chew_mbs))
				516	end_time = time.time() + options.chew
				517	while time.time() < end_time:
				518	task_num = done_queue.get()
				519	cmd_queues[task_num].put('chew')
				520	_end_stage('chew', tasks, done_queue, cmd_queues)
				521
				522	except KeyboardInterrupt:
				523	pass
				524	except WorkerDeadError as error:
				525	print('ERROR: %s' % str(error))
				526	finally:
				527	print('All done I guess; trying to end things nicely.')
				528
				529	# Throw in a command to try to get them to quit
				530	for cmd_queue in cmd_queues:
				531	cmd_queue.put('done')
				532	for task in tasks:
				533	task.join(10)
				534	task.terminate()
				535
				536	done_queue.close()
				537	for cmd_queue in cmd_queues:
				538	cmd_queue.close()
				539
				540	print('Quitting')
				541
				542	return 0
				543
				544
				545	if __name__ == '__main__':
				546	sys.exit(main(sys.argv[1:]))
				547