Blame - tools_webrtc/perf/catapult_uploader.py - webrtc.googlesource.com/src

blob: d07c287f28285d685a6ca8ae3690042b96e3d1fc [file] [log] [blame]

Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	1	#!/usr/bin/env vpython3
				2
Patrik Höglund	0569a12	2020-03-13 12:26:42 +0100	[diff] [blame]	3	# Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
				4	#
				5	# Use of this source code is governed by a BSD-style license
				6	# that can be found in the LICENSE file in the root of the source
				7	# tree. An additional intellectual property rights grant can be found
				8	# in the file PATENTS. All contributing project authors may
				9	# be found in the AUTHORS file in the root of the source tree.
				10
Andrey Logvin	728b5d0	2020-11-11 17:16:26 +0000	[diff] [blame]	11	import datetime
Patrik Höglund	0569a12	2020-03-13 12:26:42 +0100	[diff] [blame]	12	import json
				13	import subprocess
Andrey Logvin	728b5d0	2020-11-11 17:16:26 +0000	[diff] [blame]	14	import time
Patrik Höglund	0569a12	2020-03-13 12:26:42 +0100	[diff] [blame]	15	import zlib
				16
Jeremy Leconte	4fc9bd9	2022-03-18 10:21:07 +0100	[diff] [blame]	17	from typing import Optional
Jeremy Leconte	2c4a447	2022-03-14 15:22:37 +0100	[diff] [blame]	18	import dataclasses
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	19	import httplib2
				20
Patrik Höglund	620bed1	2020-03-17 09:59:10 +0100	[diff] [blame]	21	from tracing.value import histogram
Patrik Höglund	0569a12	2020-03-13 12:26:42 +0100	[diff] [blame]	22	from tracing.value import histogram_set
				23	from tracing.value.diagnostics import generic_set
				24	from tracing.value.diagnostics import reserved_infos
				25
				26
Jeremy Leconte	2c4a447	2022-03-14 15:22:37 +0100	[diff] [blame]	27	@dataclasses.dataclass
				28	class UploaderOptions():
				29	"""Required information to upload perf metrics.
				30
				31	Attributes:
				32	perf_dashboard_machine_group: The "master" the bots are grouped under.
				33	This string is the group in the the perf dashboard path
				34	group/bot/perf_id/metric/subtest.
				35	bot: The bot running the test (e.g. webrtc-win-large-tests).
				36	test_suite: The key for the test in the dashboard (i.e. what you select
				37	in the top-level test suite selector in the dashboard
				38	webrtc_git_hash: webrtc.googlesource.com commit hash.
				39	commit_position: Commit pos corresponding to the git hash.
				40	build_page_url: URL to the build page for this build.
				41	dashboard_url: Which dashboard to use.
				42	input_results_file: A HistogramSet proto file coming from WebRTC tests.
				43	output_json_file: Where to write the output (for debugging).
				44	wait_timeout_sec: Maximum amount of time in seconds that the script will
				45	wait for the confirmation.
				46	wait_polling_period_sec: Status will be requested from the Dashboard
				47	every wait_polling_period_sec seconds.
				48	"""
				49	perf_dashboard_machine_group: str
				50	bot: str
				51	test_suite: str
				52	webrtc_git_hash: str
				53	commit_position: int
				54	build_page_url: str
				55	dashboard_url: str
				56	input_results_file: str
Jeremy Leconte	4fc9bd9	2022-03-18 10:21:07 +0100	[diff] [blame]	57	output_json_file: Optional[str] = None
Jeremy Leconte	2c4a447	2022-03-14 15:22:37 +0100	[diff] [blame]	58	wait_timeout_sec: datetime.timedelta = datetime.timedelta(seconds=1200)
				59	wait_polling_period_sec: datetime.timedelta = datetime.timedelta(seconds=120)
				60
				61
Patrik Höglund	0569a12	2020-03-13 12:26:42 +0100	[diff] [blame]	62	def _GenerateOauthToken():
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	63	args = ['luci-auth', 'token']
Christoffer Jansson	409ac89	2022-02-08 18:24:29 +0100	[diff] [blame]	64	p = subprocess.Popen(args,
				65	universal_newlines=True,
				66	stdout=subprocess.PIPE,
				67	stderr=subprocess.PIPE)
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	68	if p.wait() == 0:
				69	output = p.stdout.read()
				70	return output.strip()
				71	raise RuntimeError(
				72	'Error generating authentication token.\nStdout: %s\nStderr:%s' %
				73	(p.stdout.read(), p.stderr.read()))
Patrik Höglund	0569a12	2020-03-13 12:26:42 +0100	[diff] [blame]	74
				75
Andrey Logvin	bce02a9	2020-11-24 10:04:50 +0000	[diff] [blame]	76	def _CreateHeaders(oauth_token):
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	77	return {'Authorization': 'Bearer %s' % oauth_token}
Andrey Logvin	bce02a9	2020-11-24 10:04:50 +0000	[diff] [blame]	78
				79
				80	def _SendHistogramSet(url, histograms):
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	81	"""Make a HTTP POST with the given JSON to the Performance Dashboard.
Patrik Höglund	0569a12	2020-03-13 12:26:42 +0100	[diff] [blame]	82
Andrey Logvin	728b5d0	2020-11-11 17:16:26 +0000	[diff] [blame]	83	Args:
				84	url: URL of Performance Dashboard instance, e.g.
				85	"https://chromeperf.appspot.com".
				86	histograms: a histogram set object that contains the data to be sent.
Andrey Logvin	728b5d0	2020-11-11 17:16:26 +0000	[diff] [blame]	87	"""
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	88	headers = _CreateHeaders(_GenerateOauthToken())
Patrik Höglund	0569a12	2020-03-13 12:26:42 +0100	[diff] [blame]	89
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	90	serialized = json.dumps(_ApplyHacks(histograms.AsDicts()), indent=4)
Patrik Höglund	0569a12	2020-03-13 12:26:42 +0100	[diff] [blame]	91
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	92	if url.startswith('http://localhost'):
				93	# The catapult server turns off compression in developer mode.
				94	data = serialized
				95	else:
Christoffer Jansson	1b083a9	2022-02-15 14:52:31 +0100	[diff] [blame]	96	data = zlib.compress(serialized.encode('utf-8'))
Patrik Höglund	0569a12	2020-03-13 12:26:42 +0100	[diff] [blame]	97
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	98	print('Sending %d bytes to %s.' % (len(data), url + '/add_histograms'))
Patrik Höglund	0569a12	2020-03-13 12:26:42 +0100	[diff] [blame]	99
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	100	http = httplib2.Http()
				101	response, content = http.request(url + '/add_histograms',
				102	method='POST',
				103	body=data,
				104	headers=headers)
				105	return response, content
Patrik Höglund	0569a12	2020-03-13 12:26:42 +0100	[diff] [blame]	106
				107
Andrey Logvin	bce02a9	2020-11-24 10:04:50 +0000	[diff] [blame]	108	def _WaitForUploadConfirmation(url, upload_token, wait_timeout,
Andrey Logvin	728b5d0	2020-11-11 17:16:26 +0000	[diff] [blame]	109	wait_polling_period):
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	110	"""Make a HTTP GET requests to the Performance Dashboard untill upload
Andrey Logvin	728b5d0	2020-11-11 17:16:26 +0000	[diff] [blame]	111	status is known or the time is out.
				112
				113	Args:
				114	url: URL of Performance Dashboard instance, e.g.
				115	"https://chromeperf.appspot.com".
Andrey Logvin	728b5d0	2020-11-11 17:16:26 +0000	[diff] [blame]	116	upload_token: String that identifies Performance Dashboard and can be used
				117	for the status check.
				118	wait_timeout: (datetime.timedelta) Maximum time to wait for the
				119	confirmation.
				120	wait_polling_period: (datetime.timedelta) Performance Dashboard will be
				121	polled every wait_polling_period amount of time.
				122	"""
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	123	assert wait_polling_period <= wait_timeout
Andrey Logvin	728b5d0	2020-11-11 17:16:26 +0000	[diff] [blame]	124
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	125	headers = _CreateHeaders(_GenerateOauthToken())
				126	http = httplib2.Http()
Andrey Logvin	728b5d0	2020-11-11 17:16:26 +0000	[diff] [blame]	127
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	128	oauth_refreshed = False
				129	response = None
				130	resp_json = None
				131	current_time = datetime.datetime.now()
				132	end_time = current_time + wait_timeout
				133	next_poll_time = current_time + wait_polling_period
				134	while datetime.datetime.now() < end_time:
Andrey Logvin	728b5d0	2020-11-11 17:16:26 +0000	[diff] [blame]	135	current_time = datetime.datetime.now()
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	136	if next_poll_time > current_time:
				137	time.sleep((next_poll_time - current_time).total_seconds())
				138	next_poll_time = datetime.datetime.now() + wait_polling_period
Andrey Logvin	728b5d0	2020-11-11 17:16:26 +0000	[diff] [blame]	139
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	140	response, content = http.request(url + '/uploads/' + upload_token,
				141	method='GET',
				142	headers=headers)
Andrey Logvin	e850af2	2020-11-18 15:23:53 +0000	[diff] [blame]	143
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	144	print('Upload state polled. Response: %r.' % content)
Andrey Logvin	e850af2	2020-11-18 15:23:53 +0000	[diff] [blame]	145
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	146	if not oauth_refreshed and response.status == 403:
				147	print('Oauth token refreshed. Continue polling.')
				148	headers = _CreateHeaders(_GenerateOauthToken())
				149	oauth_refreshed = True
				150	continue
Andrey Logvin	bce02a9	2020-11-24 10:04:50 +0000	[diff] [blame]	151
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	152	if response.status != 200:
				153	break
Andrey Logvin	9e302ea	2020-11-18 16:59:57 +0000	[diff] [blame]	154
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	155	resp_json = json.loads(content)
				156	if resp_json['state'] == 'COMPLETED' or resp_json['state'] == 'FAILED':
				157	break
Andrey Logvin	728b5d0	2020-11-11 17:16:26 +0000	[diff] [blame]	158
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	159	return response, resp_json
Andrey Logvin	728b5d0	2020-11-11 17:16:26 +0000	[diff] [blame]	160
				161
Andrey Logvin	844125c	2020-11-18 22:07:15 +0000	[diff] [blame]	162	# Because of an issues on the Dashboard side few measurements over a large set
				163	# can fail to upload. That would lead to the whole upload to be marked as
				164	# failed. Check it, so it doesn't increase flakiness of our tests.
				165	# TODO(crbug.com/1145904): Remove check after fixed.
Andrey Logvin	bce02a9	2020-11-24 10:04:50 +0000	[diff] [blame]	166	def _CheckFullUploadInfo(url, upload_token,
landrey	722a8a6	2021-08-12 16:27:56 +0000	[diff] [blame]	167	min_measurements_amount=50,
				168	max_failed_measurements_percent=0.03):
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	169	"""Make a HTTP GET requests to the Performance Dashboard to get full info
Andrey Logvin	844125c	2020-11-18 22:07:15 +0000	[diff] [blame]	170	about upload (including measurements). Checks if upload is correct despite
				171	not having status "COMPLETED".
				172
				173	Args:
				174	url: URL of Performance Dashboard instance, e.g.
				175	"https://chromeperf.appspot.com".
Andrey Logvin	844125c	2020-11-18 22:07:15 +0000	[diff] [blame]	176	upload_token: String that identifies Performance Dashboard and can be used
				177	for the status check.
				178	min_measurements_amount: minimal amount of measurements that the upload
				179	should have to start tolerating failures in particular measurements.
landrey	722a8a6	2021-08-12 16:27:56 +0000	[diff] [blame]	180	max_failed_measurements_percent: maximal percent of failured measurements
				181	to tolerate.
Andrey Logvin	844125c	2020-11-18 22:07:15 +0000	[diff] [blame]	182	"""
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	183	headers = _CreateHeaders(_GenerateOauthToken())
				184	http = httplib2.Http()
Andrey Logvin	844125c	2020-11-18 22:07:15 +0000	[diff] [blame]	185
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	186	response, content = http.request(url + '/uploads/' + upload_token +
				187	'?additional_info=measurements',
				188	method='GET',
				189	headers=headers)
Andrey Logvin	844125c	2020-11-18 22:07:15 +0000	[diff] [blame]	190
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	191	if response.status != 200:
				192	print('Failed to reach the dashboard to get full upload info.')
Andrey Logvin	844125c	2020-11-18 22:07:15 +0000	[diff] [blame]	193	return False
				194
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	195	resp_json = json.loads(content)
				196	print('Full upload info: %s.' % json.dumps(resp_json, indent=4))
				197
				198	if 'measurements' in resp_json:
				199	measurements_cnt = len(resp_json['measurements'])
				200	not_completed_state_cnt = len(
				201	[m for m in resp_json['measurements'] if m['state'] != 'COMPLETED'])
				202
				203	if (measurements_cnt >= min_measurements_amount
				204	and (not_completed_state_cnt /
				205	(measurements_cnt * 1.0) <= max_failed_measurements_percent)):
				206	print(('Not all measurements were confirmed to upload. '
				207	'Measurements count: %d, failed to upload or timed out: %d' %
				208	(measurements_cnt, not_completed_state_cnt)))
				209	return True
				210
				211	return False
				212
Andrey Logvin	844125c	2020-11-18 22:07:15 +0000	[diff] [blame]	213
Patrik Höglund	457c8cf	2020-03-13 14:43:21 +0100	[diff] [blame]	214	# TODO(https://crbug.com/1029452): HACKHACK
Andrey Logvin	b6b678d	2020-11-25 10:33:58 +0000	[diff] [blame]	215	# Remove once we have doubles in the proto and handle -infinity correctly.
Patrik Höglund	a89ad61	2020-03-13 16:08:08 +0100	[diff] [blame]	216	def _ApplyHacks(dicts):
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	217	def _NoInf(value):
				218	if value == float('inf'):
				219	return histogram.JS_MAX_VALUE
				220	if value == float('-inf'):
				221	return -histogram.JS_MAX_VALUE
				222	return value
Andrey Logvin	659d701	2020-11-24 15:12:25 +0000	[diff] [blame]	223
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	224	for d in dicts:
				225	if 'running' in d:
				226	d['running'] = [_NoInf(value) for value in d['running']]
				227	if 'sampleValues' in d:
				228	d['sampleValues'] = [_NoInf(value) for value in d['sampleValues']]
Mirko Bonadei	8cc6695	2020-10-30 10:13:45 +0100	[diff] [blame]	229
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	230	return dicts
Patrik Höglund	457c8cf	2020-03-13 14:43:21 +0100	[diff] [blame]	231
				232
Patrik Höglund	0569a12	2020-03-13 12:26:42 +0100	[diff] [blame]	233	def _LoadHistogramSetFromProto(options):
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	234	hs = histogram_set.HistogramSet()
Jeremy Leconte	2c4a447	2022-03-14 15:22:37 +0100	[diff] [blame]	235	with open(options.input_results_file, 'rb') as f:
Christoffer Jansson	c98fb2c	2022-02-08 21:43:45 +0100	[diff] [blame]	236	hs.ImportProto(f.read())
Patrik Höglund	0569a12	2020-03-13 12:26:42 +0100	[diff] [blame]	237
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	238	return hs
Patrik Höglund	0569a12	2020-03-13 12:26:42 +0100	[diff] [blame]	239
				240
				241	def _AddBuildInfo(histograms, options):
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	242	common_diagnostics = {
				243	reserved_infos.MASTERS: options.perf_dashboard_machine_group,
				244	reserved_infos.BOTS: options.bot,
				245	reserved_infos.POINT_ID: options.commit_position,
				246	reserved_infos.BENCHMARKS: options.test_suite,
				247	reserved_infos.WEBRTC_REVISIONS: str(options.webrtc_git_hash),
				248	reserved_infos.BUILD_URLS: options.build_page_url,
				249	}
Patrik Höglund	0569a12	2020-03-13 12:26:42 +0100	[diff] [blame]	250
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	251	for k, v in list(common_diagnostics.items()):
				252	histograms.AddSharedDiagnosticToAllHistograms(k.name,
				253	generic_set.GenericSet([v]))
Patrik Höglund	0569a12	2020-03-13 12:26:42 +0100	[diff] [blame]	254
				255
				256	def _DumpOutput(histograms, output_file):
Jeremy Leconte	fa577c5	2022-03-14 20:06:11 +0100	[diff] [blame]	257	with open(output_file, 'w') as f:
Jeremy Leconte	2c4a447	2022-03-14 15:22:37 +0100	[diff] [blame]	258	json.dump(_ApplyHacks(histograms.AsDicts()), f, indent=4)
Patrik Höglund	0569a12	2020-03-13 12:26:42 +0100	[diff] [blame]	259
				260
Jeremy Leconte	2c4a447	2022-03-14 15:22:37 +0100	[diff] [blame]	261	def UploadToDashboardImpl(options):
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	262	histograms = _LoadHistogramSetFromProto(options)
				263	_AddBuildInfo(histograms, options)
Patrik Höglund	0569a12	2020-03-13 12:26:42 +0100	[diff] [blame]	264
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	265	if options.output_json_file:
				266	_DumpOutput(histograms, options.output_json_file)
Patrik Höglund	0569a12	2020-03-13 12:26:42 +0100	[diff] [blame]	267
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	268	response, content = _SendHistogramSet(options.dashboard_url, histograms)
Patrik Höglund	0569a12	2020-03-13 12:26:42 +0100	[diff] [blame]	269
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	270	if response.status != 200:
				271	print(('Upload failed with %d: %s\n\n%s' %
				272	(response.status, response.reason, content)))
Andrey Logvin	728b5d0	2020-11-11 17:16:26 +0000	[diff] [blame]	273	return 1
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	274
				275	upload_token = json.loads(content).get('token')
Jeremy Leconte	2c4a447	2022-03-14 15:22:37 +0100	[diff] [blame]	276	if not upload_token:
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	277	print(('Received 200 from dashboard. ',
				278	'Not waiting for the upload status confirmation.'))
				279	return 0
				280
				281	response, resp_json = _WaitForUploadConfirmation(
Jeremy Leconte	2c4a447	2022-03-14 15:22:37 +0100	[diff] [blame]	282	options.dashboard_url, upload_token, options.wait_timeout_sec,
				283	options.wait_polling_period_sec)
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	284
				285	if ((resp_json and resp_json['state'] == 'COMPLETED')
				286	or _CheckFullUploadInfo(options.dashboard_url, upload_token)):
				287	print('Upload completed.')
				288	return 0
				289
				290	if response.status != 200:
				291	print(('Upload status poll failed with %d: %s' %
				292	(response.status, response.reason)))
				293	return 1
				294
				295	if resp_json['state'] == 'FAILED':
				296	print('Upload failed.')
				297	return 1
				298
Jeremy Leconte	2c4a447	2022-03-14 15:22:37 +0100	[diff] [blame]	299	print(('Upload wasn\'t completed in a given time: %s seconds.' %
Christoffer Jansson	4e8a773	2022-02-08 09:01:12 +0100	[diff] [blame]	300	options.wait_timeout_sec))
				301	return 1
Jeremy Leconte	2c4a447	2022-03-14 15:22:37 +0100	[diff] [blame]	302
				303
				304	def UploadToDashboard(options):
				305	try:
				306	exit_code = UploadToDashboardImpl(options)
				307	except RuntimeError as e:
				308	print(e)
Jeremy Leconte	4fc9bd9	2022-03-18 10:21:07 +0100	[diff] [blame]	309	return 1
Jeremy Leconte	2c4a447	2022-03-14 15:22:37 +0100	[diff] [blame]	310	return exit_code