blob: fd3aee7687e4e714b206517871aa88b4f5631885 [file] [log] [blame]
Nigel Tao1aa50162020-02-05 17:17:40 +11001// Copyright 2020 The Wuffs Authors.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// +build ignore
16
17// TODO: consider renaming this from script/preprocess-wuffs.go to
18// cmd/wuffspreprocess, making it a "go install"able command line tool.
19
20package main
21
22// preprocess-wuffs.go generates target Wuffs files based on source Wuffs
23// files. It is conceptually similar to, but weaker than, the C language's
24// preprocessor's #ifdef mechanism. It is a stand-alone tool, not built into
25// the Wuffs compiler. It runs relatively infrequently (not at every compile)
26// and preprocessed output is checked into the repository.
27//
28// Preprocessing is separate from compilation, unlike C, for multiple reasons:
29//
30// - It simplifies the Wuffs language per se, which makes it easier to write
31// other tools for Wuffs programs, such as an independent implementation of
32// the type checker or a tool that converts from Wuffs programs to the input
33// format of a formal verifier.
34//
35// - Having an explicit file containing the preprocessed output helps keep the
36// programmer aware of the cost (increased source size is correlated with
37// increased binary size) of generating code. Other programming languages
38// make it very easy (in terms of lines of code written and checked in),
39// possibly too easy, to produce lots of object code, especiallly when
40// monomorphizing favors run-time performance over binary size.
41//
42// - Writing the generated code to disk can help debug that generated code.
43//
44// It is the programmer's responsibility to re-run the preprocessor to
45// re-generate the target files whenever the source file changes, similar to
46// the Go language's "go generate" (https://blog.golang.org/generate).
47// Naturally, this can be automated to some extent, e.g. through Makefiles or
48// git hooks (when combined with the -fail-on-change flag).
49//
50// --------
51//
52// Usage:
53//
54// go run preprocess-wuffs.go a.wuffs b*.wuffs dir3 dir4
55//
56// This scans all of the files or directories (recursively, albeit skipping
57// dot-files) passed for Wuffs-preprocessor directives (see below). If no files
58// or directories are passed, it scans ".", the current working directory.
59//
60// The optional -fail-on-change flag means to fail (with a non-zero exit code)
61// if the target files' contents would change.
62//
63// --------
64//
65// Directives:
66//
67// Preprocessing is line-based, and lines of interest all start with optional
68// whitespace and then "//#", slash slash hash, e.g. "//#USE etc".
69//
70// The first directive must be #USE, which mentions the name of this program
71// and then lists the files to generate.
72//
73// Other directives are grouped into blocks:
74// - One or more "#WHEN FOO filename1 filename2" lines, and then
75// - One "#DONE FOO" line.
76//
77// The "FOO" names are arbitrary but must be unique (in a file), preventing
78// nested blocks. A good text editor can also quickly cycle through the #WHEN
79// and #DONE directives for any given block by searching for that unique name.
80// By convention, the names look like "PREPROC123". The "123" suffix is for
81// uniqueness. The names' ordering, number-suffixed or not, does not matter.
82//
83// A #WHEN's filenames detail which target files are active: the subset of the
84// #USE directive's filenames that the subsequent lines (up until the next
85// #WHEN or #DONE) apply to. A #WHEN's filenames may be empty, in which case
86// the subsequent lines are part of the source file but none of the generated
87// target files.
88//
89// A #REPLACE directive adds a simple find/replace filter to the active
90// targets, applied to every subsequent generated line. A target may have
91// multiple filters, which are applied sequentially. Filters are conceptually
92// similar to a sed script, but the mechanism is trivial: for each input line,
93// the first exact sub-string match (if any) is replaced.
94//
95// Lines that aren't directives (that don't start with whitespace then "//#")
96// are simply copied (after per-target filtering) to either all active targets
97// (when within a block) or to all targets (otherwise).
98//
99// The ## directive (e.g. "//## apple banana") is, like all directives, a "//"
100// comment in the source file, but the "//##" is stripped and the remainder
101// ("apple banana") is treated as a non-directive line, copied and filtered per
102// the previous paragraph.
103//
104// For an example, look for "PREPROC" in the std/gif/decode_gif.wuffs file, and
105// try "diff std/gif/decode_{gif,config}.wuffs".
106
107import (
108 "bytes"
109 "flag"
110 "fmt"
111 "io/ioutil"
112 "os"
113 "path/filepath"
114 "runtime"
115 "sort"
116 "strings"
117
118 "github.com/google/wuffs/lang/render"
119
120 t "github.com/google/wuffs/lang/token"
121)
122
123var (
124 focFlag = flag.Bool("fail-on-change", false,
125 "fail (with a non-zero exit code) if the target files' contents would change")
126)
127
128func main() {
129 if err := main1(); err != nil {
130 os.Stderr.WriteString(err.Error() + "\n")
131 os.Exit(1)
132 }
133}
134
135func main1() error {
136 flag.Parse()
137
138 if flag.NArg() == 0 {
139 if err := filepath.Walk(".", walk); err != nil {
140 return err
141 }
142 } else {
143 for i := 0; i < flag.NArg(); i++ {
144 arg := flag.Arg(i)
145 switch dir, err := os.Stat(arg); {
146 case err != nil:
147 return err
148 case dir.IsDir():
149 if err := filepath.Walk(arg, walk); err != nil {
150 return err
151 }
152 default:
153 if err := do(arg); err != nil {
154 return err
155 }
156 }
157 }
158 }
159
160 sortedFilenames := []string(nil)
161 for filename := range globalTargets {
162 sortedFilenames = append(sortedFilenames, filename)
163 }
164 sort.Strings(sortedFilenames)
165 for _, filename := range sortedFilenames {
166 contents := globalTargets[filename]
167 if x, err := ioutil.ReadFile(filename); (err == nil) && bytes.Equal(x, contents) {
168 fmt.Printf("gen unchanged: %s\n", filename)
169
170 continue
171 }
172 if *focFlag {
173 return fmt.Errorf("fail-on-change: %s\n", filename)
174 }
175 if err := writeFile(filename, contents); err != nil {
176 return fmt.Errorf("writing %s: %v", filename, err)
177 }
178 fmt.Printf("gen wrote: %s\n", filename)
179 }
180
181 return nil
182}
183
184func isWuffsFile(info os.FileInfo) bool {
185 name := info.Name()
186 return !info.IsDir() && !strings.HasPrefix(name, ".") && strings.HasSuffix(name, ".wuffs")
187}
188
189func walk(filename string, info os.FileInfo, err error) error {
190 if (err == nil) && isWuffsFile(info) {
191 err = do(filename)
192 }
193 // Don't complain if a file was deleted in the meantime (i.e. the directory
194 // changed concurrently while running this program).
195 if (err != nil) && !os.IsNotExist(err) {
196 return err
197 }
198 return nil
199}
200
201var (
202 directiveDone = []byte(`//#DONE `)
203 directiveHash = []byte(`//## `)
204 directiveReplace = []byte(`//#REPLACE `)
205 directiveUse = []byte(`//#USE "go run preprocess-wuffs.go" TO MAKE `)
206 directiveWhen = []byte(`//#WHEN `)
207
208 _with_ = []byte(" WITH ")
209 space = []byte(" ")
210
211 // globalTargets map from filenames to contents.
212 globalTargets = map[string][]byte{}
213)
214
215type target struct {
216 buffer *bytes.Buffer
217 filters []filter
218}
219
220func (t *target) write(s []byte) {
221 for _, f := range t.filters {
222 i := bytes.Index(s, f.find)
223 if i < 0 {
224 continue
225 }
226 x := []byte(nil)
227 x = append(x, s[:i]...)
228 x = append(x, f.replace...)
229 x = append(x, s[i+len(f.find):]...)
230 s = x
231 }
232 t.buffer.Write(s)
233}
234
235type filter struct {
236 find []byte
237 replace []byte
238}
239
240func do(filename string) error {
241 src, err := ioutil.ReadFile(filename)
242 if err != nil {
243 return err
244 }
245 if !bytes.Contains(src, directiveUse) {
246 return nil
247 }
248 localTargets := map[string]*target(nil)
249 activeTargets := []*target(nil)
250 usedBlockNames := map[string]bool{}
251 blockName := []byte(nil) // Typically something like "PREPROC123".
252 prefix := []byte(nil) // Source file contents up to the "//#USE" directive.
253
254 for remaining := src; len(remaining) > 0; {
255 line := remaining
256 if i := bytes.IndexByte(remaining, '\n'); i >= 0 {
257 line, remaining = remaining[:i+1], remaining[i+1:]
258 } else {
259 remaining = nil
260 }
261
262 ppLine := parsePreprocessorLine(line)
263 if ppLine == nil {
264 if localTargets == nil {
265 prefix = append(prefix, line...)
266 } else {
267 for _, t := range activeTargets {
268 t.write(line)
269 }
270 }
271 continue
272 }
273
274 if bytes.HasPrefix(ppLine, directiveUse) {
275 if localTargets != nil {
276 return fmt.Errorf("multiple #USE directives")
277 }
278 err := error(nil)
279 localTargets, err = parseUse(filename, ppLine[len(directiveUse):])
280 if err != nil {
281 return err
282 }
283
284 activeTargets = activeTargets[:0]
285 for _, t := range localTargets {
286 activeTargets = append(activeTargets, t)
287 t.write(prefix)
288 }
289 prefix = nil
290 continue
291 }
292
293 if localTargets == nil {
294 return fmt.Errorf("missing #USE directive")
295 }
296
297 switch {
298 case bytes.HasPrefix(ppLine, directiveDone):
299 arg := ppLine[len(directiveDone):]
300 if blockName == nil {
301 return fmt.Errorf("bad #DONE directive without #WHEN directive")
302 } else if !bytes.Equal(blockName, arg) {
303 return fmt.Errorf("bad directive name: %q", arg)
304 }
305 activeTargets = activeTargets[:0]
306 for _, t := range localTargets {
307 activeTargets = append(activeTargets, t)
308 }
309 blockName = nil
310
311 case bytes.HasPrefix(ppLine, directiveHash):
312 indent := []byte(nil)
313 if i := bytes.IndexByte(line, '/'); i >= 0 {
314 indent = line[:i]
315 }
316 if blockName == nil {
317 for _, t := range localTargets {
318 t.buffer.Write(indent)
319 t.write(ppLine[len(directiveHash):])
320 t.buffer.WriteByte('\n')
321 }
322 } else {
323 for _, t := range activeTargets {
324 t.buffer.Write(indent)
325 t.write(ppLine[len(directiveHash):])
326 t.buffer.WriteByte('\n')
327 }
328 }
329
330 case bytes.HasPrefix(ppLine, directiveReplace):
331 f := parseReplace(ppLine[len(directiveReplace):])
332 if (f.find == nil) || (f.replace == nil) {
333 return fmt.Errorf("bad #REPLACE directive: %q", ppLine)
334 }
335 for _, t := range activeTargets {
336 t.filters = append(t.filters, f)
337 }
338
339 case bytes.HasPrefix(ppLine, directiveWhen):
340 args := bytes.Split(ppLine[len(directiveWhen):], space)
341 if len(args) == 0 {
342 return fmt.Errorf("bad #WHEN directive: %q", ppLine)
343 }
344 if blockName == nil {
345 blockName = args[0]
346 if bn := string(blockName); usedBlockNames[bn] {
347 return fmt.Errorf("duplicate directive name: %q", bn)
348 } else {
349 usedBlockNames[bn] = true
350 }
351 } else if !bytes.Equal(blockName, args[0]) {
352 return fmt.Errorf("bad directive name: %q", args[0])
353 }
354
355 dir := filepath.Dir(filename)
356 activeTargets = activeTargets[:0]
357 for _, arg := range args[1:] {
358 t := localTargets[filepath.Join(dir, string(arg))]
359 if t == nil {
360 return fmt.Errorf("bad #WHEN filename: %q", arg)
361 }
362 activeTargets = append(activeTargets, t)
363 }
364
365 default:
366 return fmt.Errorf("bad directive: %q", ppLine)
367 }
368 }
369
370 if blockName != nil {
371 return fmt.Errorf("missing #DONE directive: %q", blockName)
372 }
373
374 for absFilename, t := range localTargets {
375 globalTargets[absFilename] = wuffsfmt(t.buffer.Bytes())
376 }
377 return nil
378}
379
380func wuffsfmt(src []byte) []byte {
381 tm := &t.Map{}
382 tokens, comments, err := t.Tokenize(tm, "placeholder.filename", src)
383 if err != nil {
384 return src
385 }
386 dst := &bytes.Buffer{}
387 if err := render.Render(dst, tm, tokens, comments); err != nil {
388 return src
389 }
390 return dst.Bytes()
391}
392
393const chmodSupported = runtime.GOOS != "windows"
394
395func writeFile(filename string, b []byte) error {
396 f, err := ioutil.TempFile(filepath.Dir(filename), filepath.Base(filename))
397 if err != nil {
398 return err
399 }
400 if chmodSupported {
401 if info, err := os.Stat(filename); err == nil {
402 f.Chmod(info.Mode().Perm())
403 }
404 }
405 _, werr := f.Write(b)
406 cerr := f.Close()
407 if werr != nil {
408 os.Remove(f.Name())
409 return werr
410 }
411 if cerr != nil {
412 os.Remove(f.Name())
413 return cerr
414 }
415 return os.Rename(f.Name(), filename)
416}
417
418func parsePreprocessorLine(line []byte) []byte {
419 // Look for "//#", slash slash hash.
420 line = stripLeadingWhitespace(line)
421 if (len(line) >= 3) && (line[0] == '/') && (line[1] == '/') && (line[2] == '#') {
422 return bytes.TrimSpace(line)
423 }
424 return nil
425}
426
427func parseReplace(ppLine []byte) filter {
428 s0, ppLine := parseString(ppLine)
429 if s0 == nil {
430 return filter{}
431 }
432 if !bytes.HasPrefix(ppLine, _with_) {
433 return filter{}
434 }
435 ppLine = ppLine[len(_with_):]
436 s1, ppLine := parseString(ppLine)
437 if (s1 == nil) || (len(ppLine) != 0) {
438 return filter{}
439 }
440 return filter{
441 find: s0,
442 replace: s1,
443 }
444}
445
446func parseString(line []byte) (s []byte, remaining []byte) {
447 line = stripLeadingWhitespace(line)
448 if (len(line) == 0) || (line[0] != '"') {
449 return nil, line
450 }
451 line = line[1:]
452 i := bytes.IndexByte(line, '"')
453 if i < 0 {
454 return nil, line
455 }
456 if bytes.IndexByte(line[:i], '\\') >= 0 {
457 return nil, line
458 }
459 return line[:i], line[i+1:]
460}
461
462func parseUse(srcFilename string, ppLine []byte) (map[string]*target, error) {
463 absSrcFilename := filepath.Clean(srcFilename)
464 localTargets := map[string]*target{}
465 dir := filepath.Dir(srcFilename)
466 for _, relFilename := range bytes.Split(ppLine, space) {
467 if len(relFilename) == 0 {
468 continue
469 }
470 if !validFilename(relFilename) {
471 return nil, fmt.Errorf("invalid filename: %q", string(relFilename))
472 }
473 absFilename := filepath.Join(dir, string(relFilename))
474 if _, ok := globalTargets[absFilename]; ok {
475 return nil, fmt.Errorf("duplicate filename: %q", absFilename)
476 }
477 if absFilename == absSrcFilename {
478 return nil, fmt.Errorf("self-referential filename: %q", absFilename)
479 }
480
481 buf := &bytes.Buffer{}
482 buf.WriteString(
483 "// This file was automatically generated by \"preprocess-wuffs.go\".\n\n")
484 buf.WriteString("// --------\n\n")
485 localTargets[absFilename] = &target{buffer: buf}
486 }
487 return localTargets, nil
488}
489
490func stripLeadingWhitespace(s []byte) []byte {
491 for (len(s) > 0) && (s[0] <= ' ') {
492 s = s[1:]
493 }
494 return s
495}
496
497func validFilename(s []byte) bool {
498 if (len(s) == 0) || (s[0] == '.') {
499 return false
500 }
501 for _, c := range s {
502 if (c <= ' ') || (c == '/') || (c == '\\') || (c == ':') {
503 return false
504 }
505 }
506 return true
507}