blob: efeba33518a2fbfaba0aa855c53a00ebceec3f39 [file] [log] [blame]
Piotr Pawliczekcdd921f2020-05-06 17:35:58 -07001// Copyright 2020 The Chromium OS Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "foomatic_shell/scanner.h"
6
Piotr Pawliczekcdd921f2020-05-06 17:35:58 -07007#include <string>
8#include <vector>
9
Qijiang Fan713061e2021-03-08 15:45:12 +090010#include <base/check.h>
Piotr Pawliczek7bb6e512020-07-13 10:59:36 -070011#include <base/logging.h>
12
Piotr Pawliczekcdd921f2020-05-06 17:35:58 -070013namespace foomatic_shell {
14
15// This class encapsulates an iterator representing the current position in the
16// input string.
17class Scanner::Input {
18 public:
19 // Constructor. |data| is a reference to the input string. The input string
20 // must remain constant and valid during the lifetime of the object. The
21 // current position is set to the first element in the |data|.
22 explicit Input(const std::string& data)
23 : data_(data), current_(data_.begin()) {}
24
25 Input(const Input&) = delete;
26 Input(Input&&) = delete;
27
28 // Returns the iterator to the current position. The iterator is from the
29 // input string given in the constructor and is always valid (but may be
30 // equal |data.end()|).
31 std::string::const_iterator GetCurrentPosition() const { return current_; }
32
33 // Returns the value of the current character. If the current position is set
34 // to |data.end()|, this method returns '\0'.
35 char GetCurrentCharacter() const {
36 if (current_ == data_.end())
37 return '\0';
38 return *current_;
39 }
40
41 // Returns true <=> a character at the current position equals |c|. If the
42 // current position is set to |data.end()|, it returns false.
43 bool CurrentCharIs(char c) const {
44 return (current_ != data_.end() && *current_ == c);
45 }
46
47 // Returns true <=> a character at the current position is ByteNative (see
48 // grammar.h for details). If the current position is set to |data.end()|,
49 // it returns false.
50 bool CurrentCharIsByteNative() const {
51 if (current_ == data_.end())
52 return false;
53 if (*current_ >= 'A' && *current_ <= 'Z')
54 return true;
55 if (*current_ >= 'a' && *current_ <= 'z')
56 return true;
57 if (*current_ >= '0' && *current_ <= '9')
58 return true;
59 return (std::string("./_+-@%").find(*current_) != std::string::npos);
60 }
61
62 // Returns true <=> a character at the current position equals to one of the
63 // elements of |chars|. If the current position is set to |data.end()|,
64 // it returns false.
65 bool CurrentCharIsOneOf(const std::string& chars) const {
66 if (current_ == data_.end())
67 return false;
68 return (chars.find(*current_) != std::string::npos);
69 }
70
71 // Returns true <=> the current position is set to |data.end()|.
72 bool CurrentCharIsEOF() const { return (current_ == data_.end()); }
73
74 // Move the current position to the next element. If the current position
75 // is set to |data.end()|, it does nothing.
76 void MoveToNext() {
77 if (current_ != data_.end())
78 ++current_;
79 }
80
81 private:
82 const std::string& data_;
83 std::string::const_iterator current_;
84};
85
86Scanner::Scanner(const std::string& data)
87 : data_(std::make_unique<Input>(data)) {}
88Scanner::~Scanner() {}
89
90// Parses the following (see grammar.h for details):
Piotr Pawliczek7bb6e512020-07-13 10:59:36 -070091// LiteralString = "'" , { ByteCommon | '"' | "`" | "\" } , "'" ;
Piotr Pawliczekcdd921f2020-05-06 17:35:58 -070092// The current position must be one the opening '. It moves cursor to the first
93// character after the closing '. The resultant token is added to |tokens|.
94// |tokens| must not be nullptr. Returns false in case of an error.
95bool Scanner::ParseLiteralString(std::vector<Token>* tokens) {
96 DCHECK(tokens != nullptr);
97 DCHECK(data_->CurrentCharIs('\''));
98
99 // Skip the opening '.
100 data_->MoveToNext();
101
102 // Create a new token.
103 tokens->resize(tokens->size() + 1);
104 Token* out = &(tokens->back());
105 out->type = Token::Type::kLiteralString;
106 out->begin = data_->GetCurrentPosition();
107
108 // Move forward until we find EOF or the closing '.
109 while (!data_->CurrentCharIsEOF()) {
110 if (data_->CurrentCharIs('\'')) {
111 // The closing ' was found.
112 out->end = data_->GetCurrentPosition();
113 out->value.assign(out->begin, out->end);
114 // Skip the closing '.
115 data_->MoveToNext();
116 // Success!
117 return true;
118 }
119 data_->MoveToNext();
120 }
121
122 // There is no closing '.
123 out->end = data_->GetCurrentPosition();
124 message_ = "Unexpected EOF when parsing '...' (literal string)";
125 return false;
126}
127
128// Parses the following (see grammar.h for details):
Piotr Pawliczek7bb6e512020-07-13 10:59:36 -0700129// ExecutedString = "`" , { ByteCommon | "'" | '"' | ("\",ByteAny) } , "`" ;
Piotr Pawliczekcdd921f2020-05-06 17:35:58 -0700130// The current position must be one the opening `. It moves cursor to the first
131// character after the closing `. The resultant token is added to |tokens|.
132// |tokens| must not be nullptr. Returns false in case of an error.
133bool Scanner::ParseExecutedString(std::vector<Token>* tokens) {
134 DCHECK(tokens != nullptr);
135 DCHECK(data_->CurrentCharIs('`'));
136
137 // Skip the opening `.
138 data_->MoveToNext();
139
140 // Create a new token.
141 tokens->resize(tokens->size() + 1);
142 Token* out = &(tokens->back());
143 out->type = Token::Type::kExecutedString;
144 out->begin = data_->GetCurrentPosition();
145
146 // Move forward until we find EOF or the closing `.
147 while (!data_->CurrentCharIsEOF()) {
148 if (data_->CurrentCharIs('`')) {
149 // The closing ` was found.
150 out->end = data_->GetCurrentPosition();
151 // Skip the closing `.
152 data_->MoveToNext();
153 // Success!
154 return true;
155 }
156 // The escape character (\) works in ExecutedString for ByteAny.
157 if (data_->CurrentCharIs('\\')) {
158 data_->MoveToNext();
159 if (data_->CurrentCharIsEOF())
160 break;
161 }
162 // Save the current character and move to the next element.
163 out->value.push_back(data_->GetCurrentCharacter());
164 data_->MoveToNext();
165 }
166
167 // There is no closing `.
168 out->end = data_->GetCurrentPosition();
169 message_ = "Unexpected EOF when parsing `...` (executed string)";
170 return false;
171}
172
173// Parses the following (see grammar.h for details):
Piotr Pawliczek7bb6e512020-07-13 10:59:36 -0700174// InterpretedString = '"' , { ByteCommon | "'" | "\" | ("\",'"') | ("\","`")
175// | ("\","\") | ExecutedString } , '"' ;
Piotr Pawliczekcdd921f2020-05-06 17:35:58 -0700176// The current position must be one the opening ". It moves cursor to the first
177// character after the closing ". If the string contains one or more
178// ExecutedString, it is split into a sequence of consecutive tokens of types
179// InterpretedString and ExecutedString. The resultant tokens are added to
180// |tokens|. |tokens| must not be nullptr. Returns false in case of an error.
181bool Scanner::ParseInterpretedString(std::vector<Token>* tokens) {
182 DCHECK(tokens != nullptr);
183 DCHECK(data_->CurrentCharIs('"'));
184
185 // Skip the opening ".
186 data_->MoveToNext();
187
188 // Create a sequence of alternating InterpretedString and ExecutedString
189 // tokens.
190 while (true) {
191 // Create a new InterpretedString token.
192 tokens->resize(tokens->size() + 1);
193 Token* out = &(tokens->back());
194 out->type = Token::Type::kInterpretedString;
195 out->begin = data_->GetCurrentPosition();
196
197 // Move forward until we find EOF, the closing " or the opening `.
198 while (true) {
199 if (data_->CurrentCharIs('"')) {
200 // The closing " was found.
201 out->end = data_->GetCurrentPosition();
202 data_->MoveToNext();
203 return true;
204 }
205 if (data_->CurrentCharIs('`')) {
206 // The opening ` was found. We finish the current token and
207 // add a new ExecutedString token.
208 out->end = data_->GetCurrentPosition();
209 if (!ParseExecutedString(tokens))
210 return false;
211 // We break the internal loop to create a new InterpretedString
212 // token.
213 break;
214 }
215 if (data_->CurrentCharIs('\\')) {
216 // It may be an escape character for " or `.
217 data_->MoveToNext();
218 if (data_->CurrentCharIsOneOf("\"`\\")) {
219 // The next character is " or `. Just skip \ and go ahead.
220 } else {
221 // It was not an escape character. We have to add a skipped \.
222 out->value.push_back('\\');
223 }
224 }
225 if (data_->CurrentCharIsEOF()) {
226 // There is no closing ".
227 out->end = data_->GetCurrentPosition();
228 message_ = "Unexpected EOF when parsing \"...\" (interpreted string)";
229 return false;
230 }
231 // Save the current character and move to the next element.
232 out->value.push_back(data_->GetCurrentCharacter());
233 data_->MoveToNext();
234 }
235 }
236}
237
238// Parses the following (see grammar.h for details):
Piotr Pawliczek7bb6e512020-07-13 10:59:36 -0700239// NativeString = { ByteNative | ("\",ByteAny) }- ;
Piotr Pawliczekcdd921f2020-05-06 17:35:58 -0700240// The current position must be one the first character of NativeString. It
241// moves cursor to the first character after the end of the string. The
242// resultant token is added to |tokens|. |tokens| must not be nullptr. Returns
243// false in case of an error.
244bool Scanner::ParseNativeString(std::vector<Token>* tokens) {
245 DCHECK(tokens != nullptr);
246 DCHECK(data_->CurrentCharIsByteNative() || data_->CurrentCharIs('\\'));
247
248 // Create a new token.
249 tokens->resize(tokens->size() + 1);
250 Token* out = &(tokens->back());
251 out->type = Token::Type::kNativeString;
252 out->begin = data_->GetCurrentPosition();
253
254 // Move forward until we find EOF or the end of the string.
255 while (!data_->CurrentCharIsEOF()) {
256 if (data_->CurrentCharIs('\\')) {
257 // This is an escape character.
258 data_->MoveToNext();
259 if (data_->CurrentCharIsEOF()) {
260 // It is an error: EOF after the escape character.
261 out->end = data_->GetCurrentPosition();
262 message_ = "Unexpected EOF after escape character (\\)";
263 return false;
264 }
265 // Add the escaped character to the string.
266 out->value.push_back(data_->GetCurrentCharacter());
267 // Go to the next character.
268 data_->MoveToNext();
269 continue;
270 }
271
272 // If the current character is not a ByteNative, we found the end of the
273 // string.
274 if (!data_->CurrentCharIsByteNative())
275 break;
276
277 // Save the current character and move to the next element.
278 out->value.push_back(data_->GetCurrentCharacter());
279 data_->MoveToNext();
280 }
281
282 // We are at EOF or at the first character not being part of the string.
283 out->end = data_->GetCurrentPosition();
284 return true;
285}
286
287bool Scanner::ParseWholeInput(std::vector<Token>* tokens) {
288 DCHECK(tokens != nullptr);
289
290 while (!data_->CurrentCharIsEOF()) {
291 // Check for different types of string.
292 if (data_->CurrentCharIs('\'')) {
293 if (!ParseLiteralString(tokens))
294 return false;
295 continue;
296 }
297 if (data_->CurrentCharIs('"')) {
298 if (!ParseInterpretedString(tokens))
299 return false;
300 continue;
301 }
302 if (data_->CurrentCharIs('`')) {
303 if (!ParseExecutedString(tokens))
304 return false;
305 continue;
306 }
307 if (data_->CurrentCharIsByteNative() || data_->CurrentCharIs('\\')) {
308 if (!ParseNativeString(tokens))
309 return false;
310 continue;
311 }
312
313 // Create a new token.
314 tokens->resize(tokens->size() + 1);
315 Token& token = tokens->back();
316
317 if (data_->CurrentCharIsOneOf(" \t")) {
318 // It is a Space token.
Piotr Pawliczek7bb6e512020-07-13 10:59:36 -0700319 // Space = { " " | Tabulator }- ;
Piotr Pawliczekcdd921f2020-05-06 17:35:58 -0700320 token.type = Token::Type::kSpace;
321 token.begin = data_->GetCurrentPosition();
322 // Move forward until we find the first character not being part of
323 // the Space token. It stops also at EOF.
324 while (data_->CurrentCharIsOneOf(" \t"))
325 data_->MoveToNext();
326 token.end = data_->GetCurrentPosition();
327 continue;
328 }
329
330 // Add a single character as a token.
331 token.type = Token::Type::kByte;
332 token.begin = data_->GetCurrentPosition();
333 data_->MoveToNext();
334 token.end = data_->GetCurrentPosition();
335 token.value.assign(token.begin, token.end);
336 }
337
338 // Add a special EOF token at the end.
339 tokens->resize(tokens->size() + 1);
340 tokens->back().type = Token::Type::kEOF;
341 tokens->back().begin = tokens->back().end = data_->GetCurrentPosition();
342 return true;
343}
344
345std::string::const_iterator Scanner::GetPosition() const {
346 return data_->GetCurrentPosition();
347}
348
349} // namespace foomatic_shell