Matt Tennant | f34162f | 2011-06-08 17:24:09 -0700 | [diff] [blame] | 1 | #!/usr/bin/python2.6 |
| 2 | # Copyright (c) 2011 The Chromium OS Authors. All rights reserved. |
| 3 | # Use of this source code is governed by a BSD-style license that can be |
| 4 | # found in the LICENSE file. |
| 5 | |
| 6 | """Merge multiple csv files representing Portage package data into |
| 7 | one csv file, in preparation for uploading to a Google Docs spreadsheet. |
| 8 | """ |
| 9 | |
| 10 | import optparse |
| 11 | import os |
Matt Tennant | 639b6f2 | 2011-07-15 17:11:22 -0700 | [diff] [blame] | 12 | import re |
Matt Tennant | f34162f | 2011-06-08 17:24:09 -0700 | [diff] [blame] | 13 | |
Brian Harring | 503f3ab | 2012-03-09 21:39:41 -0800 | [diff] [blame] | 14 | from chromite.lib import operation |
| 15 | from chromite.lib import table |
| 16 | from chromite.lib import upgrade_table as utable |
Matt Tennant | f34162f | 2011-06-08 17:24:09 -0700 | [diff] [blame] | 17 | |
Matt Tennant | 639b6f2 | 2011-07-15 17:11:22 -0700 | [diff] [blame] | 18 | COL_PACKAGE = utable.UpgradeTable.COL_PACKAGE |
| 19 | COL_SLOT = utable.UpgradeTable.COL_SLOT |
| 20 | COL_TARGET = utable.UpgradeTable.COL_TARGET |
| 21 | COL_OVERLAY = utable.UpgradeTable.COL_OVERLAY |
Matt Tennant | f34162f | 2011-06-08 17:24:09 -0700 | [diff] [blame] | 22 | ID_COLS = [COL_PACKAGE, COL_SLOT] |
| 23 | |
Matt Tennant | ffed1d5 | 2011-07-21 10:01:13 -0700 | [diff] [blame] | 24 | oper = operation.Operation('merge_package_status') |
Matt Tennant | ffed1d5 | 2011-07-21 10:01:13 -0700 | [diff] [blame] | 25 | |
Matt Tennant | f34162f | 2011-06-08 17:24:09 -0700 | [diff] [blame] | 26 | # A bit of hard-coding with knowledge of how cros targets work. |
| 27 | CHROMEOS_TARGET_ORDER = ['chromeos', 'chromeos-dev', 'chromeos-test'] |
| 28 | def _GetCrosTargetRank(target): |
| 29 | """Hard-coded ranking of known/expected chromeos root targets for sorting. |
| 30 | |
| 31 | The lower the ranking, the earlier in the target list it falls by |
| 32 | convention. In other words, in the typical target combination |
| 33 | "chromeos chromeos-dev", "chromeos" has a lower ranking than "chromeos-dev". |
| 34 | |
| 35 | All valid rankings are greater than zero. |
| 36 | |
| 37 | Return valid ranking for target or a false value if target is unrecognized.""" |
| 38 | for ix, targ in enumerate(CHROMEOS_TARGET_ORDER): |
| 39 | if target == targ: |
| 40 | return ix + 1 # Avoid a 0 (non-true) result |
| 41 | return None |
| 42 | |
Matt Tennant | 639b6f2 | 2011-07-15 17:11:22 -0700 | [diff] [blame] | 43 | def ProcessTargets(targets, reverse_cros=False): |
Matt Tennant | f34162f | 2011-06-08 17:24:09 -0700 | [diff] [blame] | 44 | """Process a list of |targets| to smaller, sorted list. |
| 45 | |
| 46 | For example: |
| 47 | chromeos chromeos-dev -> chromeos-dev |
| 48 | chromeos chromeos-dev world -> chromeos-dev world |
| 49 | world hard-host-depends -> hard-host-depends world |
| 50 | |
| 51 | The one chromeos target always comes back first, with targets |
| 52 | otherwise sorted alphabetically. The chromeos target that is |
| 53 | kept will be the one with the highest 'ranking', as decided |
| 54 | by _GetCrosTargetRank. To reverse the ranking sense, specify |
| 55 | |reverse_cros| as True. |
| 56 | |
| 57 | These rules are specific to how we want the information to appear |
| 58 | in the final spreadsheet. |
| 59 | """ |
| 60 | if targets: |
| 61 | # Sort cros targets according to "rank". |
| 62 | cros_targets = [t for t in targets if _GetCrosTargetRank(t)] |
| 63 | cros_targets.sort(key=_GetCrosTargetRank, reverse=reverse_cros) |
| 64 | |
| 65 | # Don't condense non-cros targets. |
| 66 | other_targets = [t for t in targets if not _GetCrosTargetRank(t)] |
| 67 | other_targets.sort() |
| 68 | |
| 69 | # Assemble final target list, with single cros target first. |
| 70 | final_targets = [] |
| 71 | if cros_targets: |
| 72 | final_targets.append(cros_targets[-1]) |
| 73 | if other_targets: |
| 74 | final_targets.extend(other_targets) |
| 75 | |
| 76 | return final_targets |
| 77 | |
Matt Tennant | f34162f | 2011-06-08 17:24:09 -0700 | [diff] [blame] | 78 | def LoadTable(filepath): |
| 79 | """Load the csv file at |filepath| into a table.Table object.""" |
Matt Tennant | 639b6f2 | 2011-07-15 17:11:22 -0700 | [diff] [blame] | 80 | table_name = os.path.basename(filepath) |
| 81 | if table_name.endswith('.csv'): |
| 82 | table_name = table_name[:-4] |
| 83 | return table.Table.LoadFromCSV(filepath, name=table_name) |
Matt Tennant | f34162f | 2011-06-08 17:24:09 -0700 | [diff] [blame] | 84 | |
Matt Tennant | 639b6f2 | 2011-07-15 17:11:22 -0700 | [diff] [blame] | 85 | def MergeTables(tables): |
| 86 | """Merge all |tables| into one merged table. Return table.""" |
Matt Tennant | fa3d272 | 2011-10-28 15:08:04 -0700 | [diff] [blame] | 87 | def TargetMerger(_col, val, other_val): |
Matt Tennant | f34162f | 2011-06-08 17:24:09 -0700 | [diff] [blame] | 88 | """Function to merge two values in Root Target column from two tables.""" |
| 89 | targets = [] |
| 90 | if val: |
| 91 | targets.extend(val.split()) |
| 92 | if other_val: |
| 93 | targets.extend(other_val.split()) |
| 94 | |
Matt Tennant | 639b6f2 | 2011-07-15 17:11:22 -0700 | [diff] [blame] | 95 | processed_targets = ProcessTargets(targets, reverse_cros=True) |
Matt Tennant | f34162f | 2011-06-08 17:24:09 -0700 | [diff] [blame] | 96 | return ' '.join(processed_targets) |
| 97 | |
| 98 | def DefaultMerger(col, val, other_val): |
| 99 | """Merge |val| and |other_val| in column |col| for some row.""" |
| 100 | # This function is registered as the default merge function, |
| 101 | # so verify that the column is a supported one. |
Matt Tennant | 639b6f2 | 2011-07-15 17:11:22 -0700 | [diff] [blame] | 102 | prfx = utable.UpgradeTable.COL_DEPENDS_ON.replace('ARCH', '') |
Matt Tennant | f34162f | 2011-06-08 17:24:09 -0700 | [diff] [blame] | 103 | if col.startswith(prfx): |
| 104 | # Merge dependencies by taking the superset. |
Matt Tennant | f3e520b | 2011-08-12 16:25:10 -0700 | [diff] [blame] | 105 | return MergeToSuperset(col, val, other_val) |
| 106 | |
| 107 | prfx = utable.UpgradeTable.COL_USED_BY.replace('ARCH', '') |
| 108 | if col.startswith(prfx): |
| 109 | # Merge users by taking the superset. |
| 110 | return MergeToSuperset(col, val, other_val) |
Matt Tennant | f34162f | 2011-06-08 17:24:09 -0700 | [diff] [blame] | 111 | |
Matt Tennant | 639b6f2 | 2011-07-15 17:11:22 -0700 | [diff] [blame] | 112 | regexp = utable.UpgradeTable.COL_UPGRADED.replace('ARCH', '\S+') |
| 113 | if re.search(regexp, col): |
| 114 | return MergeWithAND(col, val, other_val) |
| 115 | |
| 116 | # For any column, if one value is missing just accept the other value. |
| 117 | # For example, when one table has an entry for 'arm version' but |
| 118 | # the other table does not. |
| 119 | if val == table.Table.EMPTY_CELL and other_val != table.Table.EMPTY_CELL: |
| 120 | return other_val |
| 121 | if other_val == table.Table.EMPTY_CELL and val != table.Table.EMPTY_CELL: |
| 122 | return val |
| 123 | |
Matt Tennant | f34162f | 2011-06-08 17:24:09 -0700 | [diff] [blame] | 124 | # Raise a generic ValueError, which MergeTable function will clarify. |
| 125 | # The effect should be the same as having no merge_rule for this column. |
| 126 | raise ValueError |
| 127 | |
Matt Tennant | fa3d272 | 2011-10-28 15:08:04 -0700 | [diff] [blame] | 128 | def MergeToSuperset(_col, val, other_val): |
Matt Tennant | f3e520b | 2011-08-12 16:25:10 -0700 | [diff] [blame] | 129 | """Merge |col| values as superset of tokens in |val| and |other_val|.""" |
| 130 | tokens = set(val.split()) |
| 131 | other_tokens = set(other_val.split()) |
| 132 | all_tokens = tokens.union(other_tokens) |
| 133 | return ' '.join(sorted(tok for tok in all_tokens)) |
| 134 | |
Matt Tennant | f34162f | 2011-06-08 17:24:09 -0700 | [diff] [blame] | 135 | # This is only needed because the automake-wrapper package is coming from |
| 136 | # different overlays for different boards right now! |
Matt Tennant | fa3d272 | 2011-10-28 15:08:04 -0700 | [diff] [blame] | 137 | def MergeWithAND(_col, val, other_val): |
Matt Tennant | f34162f | 2011-06-08 17:24:09 -0700 | [diff] [blame] | 138 | """For merging columns that might have differences but should not!.""" |
| 139 | if not val: |
| 140 | return '"" AND ' + other_val |
| 141 | if not other_val + ' AND ""': |
| 142 | return val |
| 143 | return val + " AND " + other_val |
| 144 | |
| 145 | # Prepare merge_rules with the defined functions. |
| 146 | merge_rules = {COL_TARGET: TargetMerger, |
| 147 | COL_OVERLAY: MergeWithAND, |
| 148 | '__DEFAULT__': DefaultMerger, |
| 149 | } |
| 150 | |
Matt Tennant | 639b6f2 | 2011-07-15 17:11:22 -0700 | [diff] [blame] | 151 | # Merge each table one by one. |
| 152 | csv_table = tables[0] |
| 153 | if len(tables) > 1: |
Matt Tennant | 4192fbe | 2011-12-20 09:18:10 -0800 | [diff] [blame] | 154 | oper.Notice('Merging tables into one.') |
Matt Tennant | 639b6f2 | 2011-07-15 17:11:22 -0700 | [diff] [blame] | 155 | for tmp_table in tables[1:]: |
Matt Tennant | 4192fbe | 2011-12-20 09:18:10 -0800 | [diff] [blame] | 156 | oper.Notice('Merging "%s" and "%s".' % |
| 157 | (csv_table.GetName(), tmp_table.GetName())) |
Matt Tennant | f34162f | 2011-06-08 17:24:09 -0700 | [diff] [blame] | 158 | csv_table.MergeTable(tmp_table, ID_COLS, |
| 159 | merge_rules=merge_rules, allow_new_columns=True) |
| 160 | |
| 161 | # Sort the table by package name, then slot. |
| 162 | def IdSort(row): |
| 163 | return tuple(row[col] for col in ID_COLS) |
| 164 | csv_table.Sort(IdSort) |
| 165 | |
| 166 | return csv_table |
| 167 | |
Matt Tennant | 639b6f2 | 2011-07-15 17:11:22 -0700 | [diff] [blame] | 168 | def LoadAndMergeTables(args): |
| 169 | """Load all csv files in |args| into one merged table. Return table.""" |
| 170 | tables = [] |
| 171 | for arg in args: |
Matt Tennant | 4192fbe | 2011-12-20 09:18:10 -0800 | [diff] [blame] | 172 | oper.Notice('Loading csv table from "%s".' % arg) |
Matt Tennant | 639b6f2 | 2011-07-15 17:11:22 -0700 | [diff] [blame] | 173 | tables.append(LoadTable(arg)) |
| 174 | |
| 175 | return MergeTables(tables) |
| 176 | |
| 177 | # Used by upload_package_status. |
Matt Tennant | f34162f | 2011-06-08 17:24:09 -0700 | [diff] [blame] | 178 | def FinalizeTable(csv_table): |
| 179 | """Process the table to prepare it for upload to online spreadsheet.""" |
Matt Tennant | 4192fbe | 2011-12-20 09:18:10 -0800 | [diff] [blame] | 180 | oper.Notice('Processing final table to prepare it for upload.') |
Matt Tennant | f34162f | 2011-06-08 17:24:09 -0700 | [diff] [blame] | 181 | |
Matt Tennant | 639b6f2 | 2011-07-15 17:11:22 -0700 | [diff] [blame] | 182 | col_ver = utable.UpgradeTable.COL_CURRENT_VER |
| 183 | col_arm_ver = utable.UpgradeTable.GetColumnName(col_ver, 'arm') |
| 184 | col_x86_ver = utable.UpgradeTable.GetColumnName(col_ver, 'x86') |
Matt Tennant | f34162f | 2011-06-08 17:24:09 -0700 | [diff] [blame] | 185 | |
| 186 | # Insert new columns |
Matt Tennant | 4192fbe | 2011-12-20 09:18:10 -0800 | [diff] [blame] | 187 | col_cros_target = 'ChromeOS Root Target' |
| 188 | col_host_target = 'Host Root Target' |
| 189 | col_cmp_arch = 'Comparing arm vs x86 Versions' |
Matt Tennant | f34162f | 2011-06-08 17:24:09 -0700 | [diff] [blame] | 190 | csv_table.AppendColumn(col_cros_target) |
| 191 | csv_table.AppendColumn(col_host_target) |
| 192 | csv_table.AppendColumn(col_cmp_arch) |
| 193 | |
| 194 | # Row by row processing |
| 195 | for row in csv_table: |
| 196 | # If the row is not unique when just the package |
| 197 | # name is considered, then add a ':<slot>' suffix to the package name. |
| 198 | id_values = { COL_PACKAGE: row[COL_PACKAGE] } |
| 199 | matching_rows = csv_table.GetRowsByValue(id_values) |
| 200 | if len(matching_rows) > 1: |
| 201 | for mr in matching_rows: |
Matt Tennant | 4192fbe | 2011-12-20 09:18:10 -0800 | [diff] [blame] | 202 | mr[COL_PACKAGE] += ':' + mr[COL_SLOT] |
Matt Tennant | f34162f | 2011-06-08 17:24:09 -0700 | [diff] [blame] | 203 | |
| 204 | # Split target column into cros_target and host_target columns |
| 205 | target_str = row.get(COL_TARGET, None) |
| 206 | if target_str: |
| 207 | targets = target_str.split() |
| 208 | cros_targets = [] |
| 209 | host_targets = [] |
| 210 | for target in targets: |
| 211 | if _GetCrosTargetRank(target): |
| 212 | cros_targets.append(target) |
| 213 | else: |
| 214 | host_targets.append(target) |
| 215 | |
| 216 | row[col_cros_target] = ' '.join(cros_targets) |
| 217 | row[col_host_target] = ' '.join(host_targets) |
| 218 | |
| 219 | # Compare x86 vs. arm version, add result to col_cmp_arch. |
Matt Tennant | 4192fbe | 2011-12-20 09:18:10 -0800 | [diff] [blame] | 220 | x86_ver = row.get(col_x86_ver) |
| 221 | arm_ver = row.get(col_arm_ver) |
Matt Tennant | f34162f | 2011-06-08 17:24:09 -0700 | [diff] [blame] | 222 | if x86_ver and arm_ver: |
| 223 | if x86_ver != arm_ver: |
Matt Tennant | 4192fbe | 2011-12-20 09:18:10 -0800 | [diff] [blame] | 224 | row[col_cmp_arch] = 'different' |
| 225 | else: |
| 226 | row[col_cmp_arch] = 'same' |
Matt Tennant | f34162f | 2011-06-08 17:24:09 -0700 | [diff] [blame] | 227 | |
| 228 | def WriteTable(csv_table, outpath): |
| 229 | """Write |csv_table| out to |outpath| as csv.""" |
| 230 | try: |
| 231 | fh = open(outpath, 'w') |
| 232 | csv_table.WriteCSV(fh) |
Matt Tennant | 4192fbe | 2011-12-20 09:18:10 -0800 | [diff] [blame] | 233 | oper.Notice('Wrote merged table to "%s"' % outpath) |
Matt Tennant | f34162f | 2011-06-08 17:24:09 -0700 | [diff] [blame] | 234 | except IOError as ex: |
Matt Tennant | 4192fbe | 2011-12-20 09:18:10 -0800 | [diff] [blame] | 235 | oper.Error('Unable to open %s for write: %s' % (outpath, ex)) |
Matt Tennant | f34162f | 2011-06-08 17:24:09 -0700 | [diff] [blame] | 236 | raise |
| 237 | |
Brian Harring | 3067505 | 2012-02-29 12:18:22 -0800 | [diff] [blame] | 238 | def main(argv): |
Matt Tennant | f34162f | 2011-06-08 17:24:09 -0700 | [diff] [blame] | 239 | """Main function.""" |
| 240 | usage = 'Usage: %prog --out=merged_csv_file input_csv_files...' |
| 241 | parser = optparse.OptionParser(usage=usage) |
Matt Tennant | f34162f | 2011-06-08 17:24:09 -0700 | [diff] [blame] | 242 | parser.add_option('--out', dest='outpath', type='string', |
| 243 | action='store', default=None, |
Matt Tennant | 4192fbe | 2011-12-20 09:18:10 -0800 | [diff] [blame] | 244 | help='File to write merged results to') |
Matt Tennant | f34162f | 2011-06-08 17:24:09 -0700 | [diff] [blame] | 245 | |
Brian Harring | 3067505 | 2012-02-29 12:18:22 -0800 | [diff] [blame] | 246 | (options, args) = parser.parse_args(argv) |
Matt Tennant | f34162f | 2011-06-08 17:24:09 -0700 | [diff] [blame] | 247 | |
| 248 | # Check required options |
| 249 | if not options.outpath: |
| 250 | parser.print_help() |
Matt Tennant | 4192fbe | 2011-12-20 09:18:10 -0800 | [diff] [blame] | 251 | oper.Die('The --out option is required.') |
| 252 | if not args: |
Matt Tennant | f34162f | 2011-06-08 17:24:09 -0700 | [diff] [blame] | 253 | parser.print_help() |
Matt Tennant | 4192fbe | 2011-12-20 09:18:10 -0800 | [diff] [blame] | 254 | oper.Die('At least one input_csv_file is required.') |
Matt Tennant | f34162f | 2011-06-08 17:24:09 -0700 | [diff] [blame] | 255 | |
Matt Tennant | 639b6f2 | 2011-07-15 17:11:22 -0700 | [diff] [blame] | 256 | csv_table = LoadAndMergeTables(args) |
Matt Tennant | f34162f | 2011-06-08 17:24:09 -0700 | [diff] [blame] | 257 | |
Matt Tennant | f34162f | 2011-06-08 17:24:09 -0700 | [diff] [blame] | 258 | WriteTable(csv_table, options.outpath) |