iannucci@chromium.org | 3665cd2 | 2013-11-07 09:37:03 +0000 | [diff] [blame] | 1 | """Coverage data for Coverage.""" |
| 2 | |
| 3 | import os |
| 4 | |
| 5 | from coverage.backward import iitems, pickle, sorted # pylint: disable=W0622 |
| 6 | from coverage.files import PathAliases |
| 7 | from coverage.misc import file_be_gone |
| 8 | |
| 9 | |
| 10 | class CoverageData(object): |
| 11 | """Manages collected coverage data, including file storage. |
| 12 | |
| 13 | The data file format is a pickled dict, with these keys: |
| 14 | |
| 15 | * collector: a string identifying the collecting software |
| 16 | |
| 17 | * lines: a dict mapping filenames to sorted lists of line numbers |
| 18 | executed: |
| 19 | { 'file1': [17,23,45], 'file2': [1,2,3], ... } |
| 20 | |
| 21 | * arcs: a dict mapping filenames to sorted lists of line number pairs: |
| 22 | { 'file1': [(17,23), (17,25), (25,26)], ... } |
| 23 | |
| 24 | """ |
| 25 | |
| 26 | def __init__(self, basename=None, collector=None, debug=None): |
| 27 | """Create a CoverageData. |
| 28 | |
| 29 | `basename` is the name of the file to use for storing data. |
| 30 | |
| 31 | `collector` is a string describing the coverage measurement software. |
| 32 | |
| 33 | `debug` is a `DebugControl` object for writing debug messages. |
| 34 | |
| 35 | """ |
| 36 | self.collector = collector or 'unknown' |
| 37 | self.debug = debug |
| 38 | |
| 39 | self.use_file = True |
| 40 | |
| 41 | # Construct the filename that will be used for data file storage, if we |
| 42 | # ever do any file storage. |
| 43 | self.filename = basename or ".coverage" |
| 44 | self.filename = os.path.abspath(self.filename) |
| 45 | |
| 46 | # A map from canonical Python source file name to a dictionary in |
| 47 | # which there's an entry for each line number that has been |
| 48 | # executed: |
| 49 | # |
| 50 | # { |
| 51 | # 'filename1.py': { 12: None, 47: None, ... }, |
| 52 | # ... |
| 53 | # } |
| 54 | # |
| 55 | self.lines = {} |
| 56 | |
| 57 | # A map from canonical Python source file name to a dictionary with an |
| 58 | # entry for each pair of line numbers forming an arc: |
| 59 | # |
| 60 | # { |
| 61 | # 'filename1.py': { (12,14): None, (47,48): None, ... }, |
| 62 | # ... |
| 63 | # } |
| 64 | # |
| 65 | self.arcs = {} |
| 66 | |
| 67 | def usefile(self, use_file=True): |
| 68 | """Set whether or not to use a disk file for data.""" |
| 69 | self.use_file = use_file |
| 70 | |
| 71 | def read(self): |
| 72 | """Read coverage data from the coverage data file (if it exists).""" |
| 73 | if self.use_file: |
| 74 | self.lines, self.arcs = self._read_file(self.filename) |
| 75 | else: |
| 76 | self.lines, self.arcs = {}, {} |
| 77 | |
| 78 | def write(self, suffix=None): |
| 79 | """Write the collected coverage data to a file. |
| 80 | |
| 81 | `suffix` is a suffix to append to the base file name. This can be used |
| 82 | for multiple or parallel execution, so that many coverage data files |
| 83 | can exist simultaneously. A dot will be used to join the base name and |
| 84 | the suffix. |
| 85 | |
| 86 | """ |
| 87 | if self.use_file: |
| 88 | filename = self.filename |
| 89 | if suffix: |
| 90 | filename += "." + suffix |
| 91 | self.write_file(filename) |
| 92 | |
| 93 | def erase(self): |
| 94 | """Erase the data, both in this object, and from its file storage.""" |
| 95 | if self.use_file: |
| 96 | if self.filename: |
| 97 | file_be_gone(self.filename) |
| 98 | self.lines = {} |
| 99 | self.arcs = {} |
| 100 | |
| 101 | def line_data(self): |
| 102 | """Return the map from filenames to lists of line numbers executed.""" |
| 103 | return dict( |
| 104 | [(f, sorted(lmap.keys())) for f, lmap in iitems(self.lines)] |
| 105 | ) |
| 106 | |
| 107 | def arc_data(self): |
| 108 | """Return the map from filenames to lists of line number pairs.""" |
| 109 | return dict( |
| 110 | [(f, sorted(amap.keys())) for f, amap in iitems(self.arcs)] |
| 111 | ) |
| 112 | |
| 113 | def write_file(self, filename): |
| 114 | """Write the coverage data to `filename`.""" |
| 115 | |
| 116 | # Create the file data. |
| 117 | data = {} |
| 118 | |
| 119 | data['lines'] = self.line_data() |
| 120 | arcs = self.arc_data() |
| 121 | if arcs: |
| 122 | data['arcs'] = arcs |
| 123 | |
| 124 | if self.collector: |
| 125 | data['collector'] = self.collector |
| 126 | |
| 127 | if self.debug and self.debug.should('dataio'): |
| 128 | self.debug.write("Writing data to %r" % (filename,)) |
| 129 | |
| 130 | # Write the pickle to the file. |
| 131 | fdata = open(filename, 'wb') |
| 132 | try: |
| 133 | pickle.dump(data, fdata, 2) |
| 134 | finally: |
| 135 | fdata.close() |
| 136 | |
| 137 | def read_file(self, filename): |
| 138 | """Read the coverage data from `filename`.""" |
| 139 | self.lines, self.arcs = self._read_file(filename) |
| 140 | |
| 141 | def raw_data(self, filename): |
| 142 | """Return the raw pickled data from `filename`.""" |
| 143 | if self.debug and self.debug.should('dataio'): |
| 144 | self.debug.write("Reading data from %r" % (filename,)) |
| 145 | fdata = open(filename, 'rb') |
| 146 | try: |
| 147 | data = pickle.load(fdata) |
| 148 | finally: |
| 149 | fdata.close() |
| 150 | return data |
| 151 | |
| 152 | def _read_file(self, filename): |
| 153 | """Return the stored coverage data from the given file. |
| 154 | |
| 155 | Returns two values, suitable for assigning to `self.lines` and |
| 156 | `self.arcs`. |
| 157 | |
| 158 | """ |
| 159 | lines = {} |
| 160 | arcs = {} |
| 161 | try: |
| 162 | data = self.raw_data(filename) |
| 163 | if isinstance(data, dict): |
| 164 | # Unpack the 'lines' item. |
| 165 | lines = dict([ |
| 166 | (f, dict.fromkeys(linenos, None)) |
| 167 | for f, linenos in iitems(data.get('lines', {})) |
| 168 | ]) |
| 169 | # Unpack the 'arcs' item. |
| 170 | arcs = dict([ |
| 171 | (f, dict.fromkeys(arcpairs, None)) |
| 172 | for f, arcpairs in iitems(data.get('arcs', {})) |
| 173 | ]) |
| 174 | except Exception: |
| 175 | pass |
| 176 | return lines, arcs |
| 177 | |
| 178 | def combine_parallel_data(self, aliases=None): |
| 179 | """Combine a number of data files together. |
| 180 | |
| 181 | Treat `self.filename` as a file prefix, and combine the data from all |
| 182 | of the data files starting with that prefix plus a dot. |
| 183 | |
| 184 | If `aliases` is provided, it's a `PathAliases` object that is used to |
| 185 | re-map paths to match the local machine's. |
| 186 | |
| 187 | """ |
| 188 | aliases = aliases or PathAliases() |
| 189 | data_dir, local = os.path.split(self.filename) |
| 190 | localdot = local + '.' |
| 191 | for f in os.listdir(data_dir or '.'): |
| 192 | if f.startswith(localdot): |
| 193 | full_path = os.path.join(data_dir, f) |
| 194 | new_lines, new_arcs = self._read_file(full_path) |
| 195 | for filename, file_data in iitems(new_lines): |
| 196 | filename = aliases.map(filename) |
| 197 | self.lines.setdefault(filename, {}).update(file_data) |
| 198 | for filename, file_data in iitems(new_arcs): |
| 199 | filename = aliases.map(filename) |
| 200 | self.arcs.setdefault(filename, {}).update(file_data) |
| 201 | if f != local: |
| 202 | os.remove(full_path) |
| 203 | |
| 204 | def add_line_data(self, line_data): |
| 205 | """Add executed line data. |
| 206 | |
| 207 | `line_data` is { filename: { lineno: None, ... }, ...} |
| 208 | |
| 209 | """ |
| 210 | for filename, linenos in iitems(line_data): |
| 211 | self.lines.setdefault(filename, {}).update(linenos) |
| 212 | |
| 213 | def add_arc_data(self, arc_data): |
| 214 | """Add measured arc data. |
| 215 | |
| 216 | `arc_data` is { filename: { (l1,l2): None, ... }, ...} |
| 217 | |
| 218 | """ |
| 219 | for filename, arcs in iitems(arc_data): |
| 220 | self.arcs.setdefault(filename, {}).update(arcs) |
| 221 | |
| 222 | def touch_file(self, filename): |
| 223 | """Ensure that `filename` appears in the data, empty if needed.""" |
| 224 | self.lines.setdefault(filename, {}) |
| 225 | |
| 226 | def measured_files(self): |
| 227 | """A list of all files that had been measured.""" |
| 228 | return list(self.lines.keys()) |
| 229 | |
| 230 | def executed_lines(self, filename): |
| 231 | """A map containing all the line numbers executed in `filename`. |
| 232 | |
| 233 | If `filename` hasn't been collected at all (because it wasn't executed) |
| 234 | then return an empty map. |
| 235 | |
| 236 | """ |
| 237 | return self.lines.get(filename) or {} |
| 238 | |
| 239 | def executed_arcs(self, filename): |
| 240 | """A map containing all the arcs executed in `filename`.""" |
| 241 | return self.arcs.get(filename) or {} |
| 242 | |
| 243 | def add_to_hash(self, filename, hasher): |
| 244 | """Contribute `filename`'s data to the Md5Hash `hasher`.""" |
| 245 | hasher.update(self.executed_lines(filename)) |
| 246 | hasher.update(self.executed_arcs(filename)) |
| 247 | |
| 248 | def summary(self, fullpath=False): |
| 249 | """Return a dict summarizing the coverage data. |
| 250 | |
| 251 | Keys are based on the filenames, and values are the number of executed |
| 252 | lines. If `fullpath` is true, then the keys are the full pathnames of |
| 253 | the files, otherwise they are the basenames of the files. |
| 254 | |
| 255 | """ |
| 256 | summ = {} |
| 257 | if fullpath: |
| 258 | filename_fn = lambda f: f |
| 259 | else: |
| 260 | filename_fn = os.path.basename |
| 261 | for filename, lines in iitems(self.lines): |
| 262 | summ[filename_fn(filename)] = len(lines) |
| 263 | return summ |
| 264 | |
| 265 | def has_arcs(self): |
| 266 | """Does this data have arcs?""" |
| 267 | return bool(self.arcs) |
| 268 | |
| 269 | |
| 270 | if __name__ == '__main__': |
| 271 | # Ad-hoc: show the raw data in a data file. |
| 272 | import pprint, sys |
| 273 | covdata = CoverageData() |
| 274 | if sys.argv[1:]: |
| 275 | fname = sys.argv[1] |
| 276 | else: |
| 277 | fname = covdata.filename |
| 278 | pprint.pprint(covdata.raw_data(fname)) |