blob: 8afd41131e9385c1f7151866669c7615f75077b7 [file] [log] [blame]
Nigel Tao1aa50162020-02-05 17:17:40 +11001// Copyright 2020 The Wuffs Authors.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
Nigel Tao788479d2021-08-22 10:52:51 +100015//go:build ignore
Nigel Tao1aa50162020-02-05 17:17:40 +110016// +build ignore
17
Nigel Taof473ffa2020-12-18 22:10:16 +110018// Deprecated: unused as of commit 695a6815 "Remove gif.config_decoder".
19
Nigel Tao1aa50162020-02-05 17:17:40 +110020// TODO: consider renaming this from script/preprocess-wuffs.go to
21// cmd/wuffspreprocess, making it a "go install"able command line tool.
22
23package main
24
25// preprocess-wuffs.go generates target Wuffs files based on source Wuffs
26// files. It is conceptually similar to, but weaker than, the C language's
27// preprocessor's #ifdef mechanism. It is a stand-alone tool, not built into
28// the Wuffs compiler. It runs relatively infrequently (not at every compile)
29// and preprocessed output is checked into the repository.
30//
31// Preprocessing is separate from compilation, unlike C, for multiple reasons:
32//
33// - It simplifies the Wuffs language per se, which makes it easier to write
34// other tools for Wuffs programs, such as an independent implementation of
35// the type checker or a tool that converts from Wuffs programs to the input
36// format of a formal verifier.
37//
38// - Having an explicit file containing the preprocessed output helps keep the
39// programmer aware of the cost (increased source size is correlated with
40// increased binary size) of generating code. Other programming languages
41// make it very easy (in terms of lines of code written and checked in),
42// possibly too easy, to produce lots of object code, especiallly when
43// monomorphizing favors run-time performance over binary size.
44//
45// - Writing the generated code to disk can help debug that generated code.
46//
47// It is the programmer's responsibility to re-run the preprocessor to
48// re-generate the target files whenever the source file changes, similar to
49// the Go language's "go generate" (https://blog.golang.org/generate).
50// Naturally, this can be automated to some extent, e.g. through Makefiles or
51// git hooks (when combined with the -fail-on-change flag).
52//
53// --------
54//
55// Usage:
56//
57// go run preprocess-wuffs.go a.wuffs b*.wuffs dir3 dir4
58//
59// This scans all of the files or directories (recursively, albeit skipping
60// dot-files) passed for Wuffs-preprocessor directives (see below). If no files
61// or directories are passed, it scans ".", the current working directory.
62//
63// The optional -fail-on-change flag means to fail (with a non-zero exit code)
64// if the target files' contents would change.
65//
66// --------
67//
68// Directives:
69//
70// Preprocessing is line-based, and lines of interest all start with optional
71// whitespace and then "//#", slash slash hash, e.g. "//#USE etc".
72//
73// The first directive must be #USE, which mentions the name of this program
74// and then lists the files to generate.
75//
76// Other directives are grouped into blocks:
77// - One or more "#WHEN FOO filename1 filename2" lines, and then
78// - One "#DONE FOO" line.
79//
80// The "FOO" names are arbitrary but must be unique (in a file), preventing
81// nested blocks. A good text editor can also quickly cycle through the #WHEN
82// and #DONE directives for any given block by searching for that unique name.
83// By convention, the names look like "PREPROC123". The "123" suffix is for
84// uniqueness. The names' ordering, number-suffixed or not, does not matter.
85//
86// A #WHEN's filenames detail which target files are active: the subset of the
87// #USE directive's filenames that the subsequent lines (up until the next
88// #WHEN or #DONE) apply to. A #WHEN's filenames may be empty, in which case
89// the subsequent lines are part of the source file but none of the generated
90// target files.
91//
92// A #REPLACE directive adds a simple find/replace filter to the active
93// targets, applied to every subsequent generated line. A target may have
94// multiple filters, which are applied sequentially. Filters are conceptually
95// similar to a sed script, but the mechanism is trivial: for each input line,
96// the first exact sub-string match (if any) is replaced.
97//
98// Lines that aren't directives (that don't start with whitespace then "//#")
99// are simply copied (after per-target filtering) to either all active targets
100// (when within a block) or to all targets (otherwise).
101//
102// The ## directive (e.g. "//## apple banana") is, like all directives, a "//"
103// comment in the source file, but the "//##" is stripped and the remainder
104// ("apple banana") is treated as a non-directive line, copied and filtered per
105// the previous paragraph.
106//
107// For an example, look for "PREPROC" in the std/gif/decode_gif.wuffs file, and
108// try "diff std/gif/decode_{gif,config}.wuffs".
109
110import (
111 "bytes"
112 "flag"
113 "fmt"
Nigel Tao1aa50162020-02-05 17:17:40 +1100114 "os"
115 "path/filepath"
116 "runtime"
117 "sort"
118 "strings"
119
120 "github.com/google/wuffs/lang/render"
121
122 t "github.com/google/wuffs/lang/token"
123)
124
125var (
126 focFlag = flag.Bool("fail-on-change", false,
127 "fail (with a non-zero exit code) if the target files' contents would change")
128)
129
130func main() {
131 if err := main1(); err != nil {
132 os.Stderr.WriteString(err.Error() + "\n")
133 os.Exit(1)
134 }
135}
136
137func main1() error {
138 flag.Parse()
139
140 if flag.NArg() == 0 {
141 if err := filepath.Walk(".", walk); err != nil {
142 return err
143 }
144 } else {
145 for i := 0; i < flag.NArg(); i++ {
146 arg := flag.Arg(i)
147 switch dir, err := os.Stat(arg); {
148 case err != nil:
149 return err
150 case dir.IsDir():
151 if err := filepath.Walk(arg, walk); err != nil {
152 return err
153 }
154 default:
155 if err := do(arg); err != nil {
156 return err
157 }
158 }
159 }
160 }
161
162 sortedFilenames := []string(nil)
163 for filename := range globalTargets {
164 sortedFilenames = append(sortedFilenames, filename)
165 }
166 sort.Strings(sortedFilenames)
167 for _, filename := range sortedFilenames {
168 contents := globalTargets[filename]
Nigel Tao226c4762021-08-22 11:05:43 +1000169 if x, err := os.ReadFile(filename); (err == nil) && bytes.Equal(x, contents) {
Nigel Tao1aa50162020-02-05 17:17:40 +1100170 fmt.Printf("gen unchanged: %s\n", filename)
171
172 continue
173 }
174 if *focFlag {
175 return fmt.Errorf("fail-on-change: %s\n", filename)
176 }
177 if err := writeFile(filename, contents); err != nil {
178 return fmt.Errorf("writing %s: %v", filename, err)
179 }
180 fmt.Printf("gen wrote: %s\n", filename)
181 }
182
183 return nil
184}
185
186func isWuffsFile(info os.FileInfo) bool {
187 name := info.Name()
188 return !info.IsDir() && !strings.HasPrefix(name, ".") && strings.HasSuffix(name, ".wuffs")
189}
190
191func walk(filename string, info os.FileInfo, err error) error {
192 if (err == nil) && isWuffsFile(info) {
193 err = do(filename)
194 }
195 // Don't complain if a file was deleted in the meantime (i.e. the directory
196 // changed concurrently while running this program).
197 if (err != nil) && !os.IsNotExist(err) {
198 return err
199 }
200 return nil
201}
202
203var (
204 directiveDone = []byte(`//#DONE `)
205 directiveHash = []byte(`//## `)
206 directiveReplace = []byte(`//#REPLACE `)
207 directiveUse = []byte(`//#USE "go run preprocess-wuffs.go" TO MAKE `)
208 directiveWhen = []byte(`//#WHEN `)
209
210 _with_ = []byte(" WITH ")
211 space = []byte(" ")
212
213 // globalTargets map from filenames to contents.
214 globalTargets = map[string][]byte{}
215)
216
217type target struct {
218 buffer *bytes.Buffer
219 filters []filter
220}
221
222func (t *target) write(s []byte) {
223 for _, f := range t.filters {
224 i := bytes.Index(s, f.find)
225 if i < 0 {
226 continue
227 }
228 x := []byte(nil)
229 x = append(x, s[:i]...)
230 x = append(x, f.replace...)
231 x = append(x, s[i+len(f.find):]...)
232 s = x
233 }
234 t.buffer.Write(s)
235}
236
237type filter struct {
238 find []byte
239 replace []byte
240}
241
242func do(filename string) error {
Nigel Tao226c4762021-08-22 11:05:43 +1000243 src, err := os.ReadFile(filename)
Nigel Tao1aa50162020-02-05 17:17:40 +1100244 if err != nil {
245 return err
246 }
247 if !bytes.Contains(src, directiveUse) {
248 return nil
249 }
250 localTargets := map[string]*target(nil)
251 activeTargets := []*target(nil)
252 usedBlockNames := map[string]bool{}
253 blockName := []byte(nil) // Typically something like "PREPROC123".
254 prefix := []byte(nil) // Source file contents up to the "//#USE" directive.
255
256 for remaining := src; len(remaining) > 0; {
257 line := remaining
258 if i := bytes.IndexByte(remaining, '\n'); i >= 0 {
259 line, remaining = remaining[:i+1], remaining[i+1:]
260 } else {
261 remaining = nil
262 }
263
264 ppLine := parsePreprocessorLine(line)
265 if ppLine == nil {
266 if localTargets == nil {
267 prefix = append(prefix, line...)
268 } else {
269 for _, t := range activeTargets {
270 t.write(line)
271 }
272 }
273 continue
274 }
275
276 if bytes.HasPrefix(ppLine, directiveUse) {
277 if localTargets != nil {
278 return fmt.Errorf("multiple #USE directives")
279 }
280 err := error(nil)
281 localTargets, err = parseUse(filename, ppLine[len(directiveUse):])
282 if err != nil {
283 return err
284 }
285
286 activeTargets = activeTargets[:0]
287 for _, t := range localTargets {
288 activeTargets = append(activeTargets, t)
289 t.write(prefix)
290 }
291 prefix = nil
292 continue
293 }
294
295 if localTargets == nil {
296 return fmt.Errorf("missing #USE directive")
297 }
298
299 switch {
300 case bytes.HasPrefix(ppLine, directiveDone):
301 arg := ppLine[len(directiveDone):]
302 if blockName == nil {
303 return fmt.Errorf("bad #DONE directive without #WHEN directive")
304 } else if !bytes.Equal(blockName, arg) {
305 return fmt.Errorf("bad directive name: %q", arg)
306 }
307 activeTargets = activeTargets[:0]
308 for _, t := range localTargets {
309 activeTargets = append(activeTargets, t)
310 }
311 blockName = nil
312
313 case bytes.HasPrefix(ppLine, directiveHash):
314 indent := []byte(nil)
315 if i := bytes.IndexByte(line, '/'); i >= 0 {
316 indent = line[:i]
317 }
318 if blockName == nil {
319 for _, t := range localTargets {
320 t.buffer.Write(indent)
321 t.write(ppLine[len(directiveHash):])
322 t.buffer.WriteByte('\n')
323 }
324 } else {
325 for _, t := range activeTargets {
326 t.buffer.Write(indent)
327 t.write(ppLine[len(directiveHash):])
328 t.buffer.WriteByte('\n')
329 }
330 }
331
332 case bytes.HasPrefix(ppLine, directiveReplace):
333 f := parseReplace(ppLine[len(directiveReplace):])
334 if (f.find == nil) || (f.replace == nil) {
335 return fmt.Errorf("bad #REPLACE directive: %q", ppLine)
336 }
337 for _, t := range activeTargets {
338 t.filters = append(t.filters, f)
339 }
340
341 case bytes.HasPrefix(ppLine, directiveWhen):
342 args := bytes.Split(ppLine[len(directiveWhen):], space)
343 if len(args) == 0 {
344 return fmt.Errorf("bad #WHEN directive: %q", ppLine)
345 }
346 if blockName == nil {
347 blockName = args[0]
348 if bn := string(blockName); usedBlockNames[bn] {
349 return fmt.Errorf("duplicate directive name: %q", bn)
350 } else {
351 usedBlockNames[bn] = true
352 }
353 } else if !bytes.Equal(blockName, args[0]) {
354 return fmt.Errorf("bad directive name: %q", args[0])
355 }
356
357 dir := filepath.Dir(filename)
358 activeTargets = activeTargets[:0]
359 for _, arg := range args[1:] {
360 t := localTargets[filepath.Join(dir, string(arg))]
361 if t == nil {
362 return fmt.Errorf("bad #WHEN filename: %q", arg)
363 }
364 activeTargets = append(activeTargets, t)
365 }
366
367 default:
368 return fmt.Errorf("bad directive: %q", ppLine)
369 }
370 }
371
372 if blockName != nil {
373 return fmt.Errorf("missing #DONE directive: %q", blockName)
374 }
375
376 for absFilename, t := range localTargets {
377 globalTargets[absFilename] = wuffsfmt(t.buffer.Bytes())
378 }
379 return nil
380}
381
382func wuffsfmt(src []byte) []byte {
383 tm := &t.Map{}
384 tokens, comments, err := t.Tokenize(tm, "placeholder.filename", src)
385 if err != nil {
386 return src
387 }
388 dst := &bytes.Buffer{}
389 if err := render.Render(dst, tm, tokens, comments); err != nil {
390 return src
391 }
392 return dst.Bytes()
393}
394
395const chmodSupported = runtime.GOOS != "windows"
396
397func writeFile(filename string, b []byte) error {
Nigel Tao226c4762021-08-22 11:05:43 +1000398 f, err := os.CreateTemp(filepath.Dir(filename), filepath.Base(filename))
Nigel Tao1aa50162020-02-05 17:17:40 +1100399 if err != nil {
400 return err
401 }
402 if chmodSupported {
403 if info, err := os.Stat(filename); err == nil {
404 f.Chmod(info.Mode().Perm())
405 }
406 }
407 _, werr := f.Write(b)
408 cerr := f.Close()
409 if werr != nil {
410 os.Remove(f.Name())
411 return werr
412 }
413 if cerr != nil {
414 os.Remove(f.Name())
415 return cerr
416 }
417 return os.Rename(f.Name(), filename)
418}
419
420func parsePreprocessorLine(line []byte) []byte {
421 // Look for "//#", slash slash hash.
422 line = stripLeadingWhitespace(line)
423 if (len(line) >= 3) && (line[0] == '/') && (line[1] == '/') && (line[2] == '#') {
424 return bytes.TrimSpace(line)
425 }
426 return nil
427}
428
429func parseReplace(ppLine []byte) filter {
430 s0, ppLine := parseString(ppLine)
431 if s0 == nil {
432 return filter{}
433 }
434 if !bytes.HasPrefix(ppLine, _with_) {
435 return filter{}
436 }
437 ppLine = ppLine[len(_with_):]
438 s1, ppLine := parseString(ppLine)
439 if (s1 == nil) || (len(ppLine) != 0) {
440 return filter{}
441 }
442 return filter{
443 find: s0,
444 replace: s1,
445 }
446}
447
448func parseString(line []byte) (s []byte, remaining []byte) {
449 line = stripLeadingWhitespace(line)
450 if (len(line) == 0) || (line[0] != '"') {
451 return nil, line
452 }
453 line = line[1:]
454 i := bytes.IndexByte(line, '"')
455 if i < 0 {
456 return nil, line
457 }
458 if bytes.IndexByte(line[:i], '\\') >= 0 {
459 return nil, line
460 }
461 return line[:i], line[i+1:]
462}
463
464func parseUse(srcFilename string, ppLine []byte) (map[string]*target, error) {
465 absSrcFilename := filepath.Clean(srcFilename)
466 localTargets := map[string]*target{}
467 dir := filepath.Dir(srcFilename)
468 for _, relFilename := range bytes.Split(ppLine, space) {
469 if len(relFilename) == 0 {
470 continue
471 }
472 if !validFilename(relFilename) {
473 return nil, fmt.Errorf("invalid filename: %q", string(relFilename))
474 }
475 absFilename := filepath.Join(dir, string(relFilename))
476 if _, ok := globalTargets[absFilename]; ok {
477 return nil, fmt.Errorf("duplicate filename: %q", absFilename)
478 }
479 if absFilename == absSrcFilename {
480 return nil, fmt.Errorf("self-referential filename: %q", absFilename)
481 }
482
483 buf := &bytes.Buffer{}
484 buf.WriteString(
485 "// This file was automatically generated by \"preprocess-wuffs.go\".\n\n")
486 buf.WriteString("// --------\n\n")
487 localTargets[absFilename] = &target{buffer: buf}
488 }
489 return localTargets, nil
490}
491
492func stripLeadingWhitespace(s []byte) []byte {
493 for (len(s) > 0) && (s[0] <= ' ') {
494 s = s[1:]
495 }
496 return s
497}
498
499func validFilename(s []byte) bool {
500 if (len(s) == 0) || (s[0] == '.') {
501 return false
502 }
503 for _, c := range s {
504 if (c <= ' ') || (c == '/') || (c == '\\') || (c == ':') {
505 return false
506 }
507 }
508 return true
509}