network-auditor: Parser v0
parser.py program requires an auditd dump file passed as the first
argument. It parses the provided file and outputs the per-process
frequencies of connect() syscall usage. ONLY AF_INET or AF_INTE6
types of sockets are counted.
BUG=b:214428470
TEST=Outputs frequencies on sample dump file. See README.md!
Change-Id: If37a8f417921ec28f8e861d11b6ef31f1c420c9e
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/dev-util/+/3471537
Reviewed-by: Zauri Meshveliani‎ <zauri@chromium.org>
Reviewed-by: Ramya Gopalan <ramyagopalan@google.com>
Commit-Queue: Ramya Gopalan <ramyagopalan@google.com>
Tested-by: Ramya Gopalan <ramyagopalan@google.com>
Reviewed-by: Jorge Lucangeli Obes <jorgelo@chromium.org>
Commit-Queue: Zauri Meshveliani‎ <zauri@chromium.org>
diff --git a/contrib/net-auditor-parser/parser.py b/contrib/net-auditor-parser/parser.py
new file mode 100644
index 0000000..dfb9573
--- /dev/null
+++ b/contrib/net-auditor-parser/parser.py
@@ -0,0 +1,214 @@
+# // Copyright 2022 The Chromium OS Authors. All rights reserved.
+# // Use of this source code is governed by a BSD-style license that can be
+# // found in the LICENSE file.
+
+"""Parse auditd logs.
+
+Parse the connect() syscall usage per process basis.
+Print the frequency historgram in stdout.
+
+Example:
+ python3 parser.py data/sample_data.txt
+"""
+
+
+import sys
+
+
+DELIM = '----'
+SYSCALL = 'SYSCALL'
+SOCKADDR = 'SOCKADDR'
+EVENTID = 'event_id'
+TYPE = 'type'
+FAM = 'fam'
+LADDR = 'laddr'
+LPORT = 'lport'
+PID = 'pid'
+PPID = 'ppid'
+UID = 'uid'
+COMM = 'comm'
+EXE = 'exe'
+
+IGNORE_ADDR_LIST = ['127.0.0.1', '0.0.0.0', '::', '::1']
+
+
+def main(argv):
+ """Parse the log file and show frequency histogram.
+
+ Format of the output is: '<number> <process_name>', meaning that the
+ <proces_name> invoked the connect() syscall <number> of times.
+
+ Args:
+ argv: A single element list, specifying the log file path
+ e.g. ["data/sample_output.txt"].
+ """
+ data = parse_file(argv[0])
+ visualize(data)
+
+
+def visualize(data):
+ """Print frequency histogram of per-process usage of sys_connect().
+
+ We want to observe what information is sent off of devices.
+ Consequently, ignoring:
+ * sockets that DO NOT use internet protocol
+ * loopback interfaces
+ """
+ stats = {}
+ for data_point in data:
+ # Skip the logs which don't contain the syscall information.
+ if COMM in data_point and FAM in data_point:
+ comm, fam, addr = [
+ data_point[COMM],
+ data_point[FAM],
+ data_point.get(LADDR, None),
+ ]
+
+ # Ignore non-internet protocols
+ if fam not in ['inet', 'inet6']:
+ continue
+ # Ignore loopback addresses.
+ if addr in IGNORE_ADDR_LIST:
+ continue
+
+ stats[comm] = stats.get(comm, 0) + 1
+ else:
+ continue
+
+ for item in sorted(stats, key=stats.get, reverse=True):
+ print(stats[item], '\t', item)
+
+
+def parse_file(file_name):
+ """Parse log file and return as a list of dictionary items.
+
+ Args:
+ file_name: path the log file.
+
+ Returns:
+ A list containing per-event information.
+ """
+ events = None
+ with open(file_name, 'r') as file:
+ events = file.read().split(DELIM)
+ events = list(filter(len, events))
+
+ data = []
+ for event in events:
+ data.append(parse_event(event))
+
+ return data
+
+
+def parse_event(event):
+ """Parse single event information into a data point.
+
+ Multiple log entries are logged for each event. Logs of the other events
+ are separated with '----' delimiter in a log file.
+ This function takes all the log entries of a single event, parses out the
+ required fields into a dictionary and returns it.
+
+ Args:
+ event: String containing all the logs of a single event.
+
+ Returns:
+ A dictionary with the relevant event information.
+ """
+ data_point = {}
+
+ # Parse relevant fields.
+ parse_eventid(event, data_point)
+ parse_syscall_bits(event, data_point)
+ parse_sockaddr_bits(event, data_point)
+
+ return data_point
+
+
+def parse_type(event, log_type):
+ """Return the log entry with the desired log type.
+
+ Args:
+ event: String of all the log entries associated with a single event.
+ log_type: Type of the log entry we're searcing for e.g. 'SYSCALL.
+
+ Returns:
+ The first log entry of the given type (should be EXACTLY one).
+ Returns empty string if the type is not found.
+ """
+ log_entries = event.split('\n')
+ log_entries = list(filter(len, log_entries))
+ log_entries = [e.rstrip() for e in log_entries]
+
+ for entry in log_entries:
+ cur_type = entry.split()[0].replace('type=', '')
+ if cur_type == log_type:
+ return entry
+
+ return ''
+
+
+def parse_eventid(event, data_point):
+ """Populate EVENTID field of the data_point."""
+ log_entry = event.split()[2].rstrip()
+
+ eventid = log_entry.split(')')[0]
+ eventid = eventid[eventid.rfind(':')+1:]
+
+ data_point[EVENTID] = int(eventid)
+
+
+def parse_syscall_bits(event, data_point):
+ """Populate the SYSCALL-related fields into a data point."""
+ sys_entry = parse_type(event, SYSCALL)
+ if sys_entry == '':
+ return
+
+ ppid, pid, auid, uid, gid, comm, exe, subj = [
+ ' ppid=',
+ ' pid=',
+ ' auid=',
+ ' uid=',
+ ' gid=',
+ ' comm=',
+ ' exe=',
+ ' subj=',
+ ]
+ data_point[PPID] = int(sys_entry[sys_entry.find(ppid)+len(ppid)
+ : sys_entry.find(pid)])
+ data_point[PID] = int(sys_entry[sys_entry.find(pid)+len(pid)
+ : sys_entry.find(auid)])
+ data_point[UID] = sys_entry[sys_entry.find(uid)+len(uid)
+ : sys_entry.find(gid)]
+ data_point[COMM] = sys_entry[sys_entry.find(comm)+len(comm)
+ : sys_entry.find(exe)]
+ data_point[EXE] = sys_entry[sys_entry.find(exe)+len(exe)
+ : sys_entry.find(subj)]
+
+
+def parse_sockaddr_bits(event, data_point):
+ """Populate the SOCKADDR-related bits into a data point."""
+ sockaddr_entry = parse_type(event, SOCKADDR)
+ if sockaddr_entry == '':
+ return
+
+ fam, laddr, lport = [
+ ' fam=',
+ ' laddr=',
+ ' lport=',
+ ]
+ data_point[FAM] = sockaddr_entry[sockaddr_entry.find(fam)+len(fam)
+ : sockaddr_entry.find(laddr)]
+ if data_point[FAM] == 'inet' or data_point[FAM] == 'inet6':
+ data_point[LADDR] = sockaddr_entry[sockaddr_entry.find(laddr)
+ + len(laddr)
+ : sockaddr_entry.find(lport)]
+ data_point[LPORT] = int(sockaddr_entry[
+ sockaddr_entry.find(lport) + len(lport)
+ : sockaddr_entry.find(' ', sockaddr_entry.find(lport)+1)])
+ else:
+ # TODO(zauri): do we need non-inet[6] packets?
+ pass
+
+
+if __name__ == '__main__':
+ main(sys.argv[1:])