Blame - foomatic_shell/scanner.cc - chromium.googlesource.com/chromiumos/platform2

blob: efeba33518a2fbfaba0aa855c53a00ebceec3f39 [file] [log] [blame]

Piotr Pawliczek	cdd921f	2020-05-06 17:35:58 -0700	[diff] [blame]	1	// Copyright 2020 The Chromium OS Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style license that can be
				3	// found in the LICENSE file.
				4
				5	#include "foomatic_shell/scanner.h"
				6
Piotr Pawliczek	cdd921f	2020-05-06 17:35:58 -0700	[diff] [blame]	7	#include <string>
				8	#include <vector>
				9
Qijiang Fan	713061e	2021-03-08 15:45:12 +0900	[diff] [blame]	10	#include <base/check.h>
Piotr Pawliczek	7bb6e51	2020-07-13 10:59:36 -0700	[diff] [blame]	11	#include <base/logging.h>
				12
Piotr Pawliczek	cdd921f	2020-05-06 17:35:58 -0700	[diff] [blame]	13	namespace foomatic_shell {
				14
				15	// This class encapsulates an iterator representing the current position in the
				16	// input string.
				17	class Scanner::Input {
				18	public:
				19	// Constructor. \|data\| is a reference to the input string. The input string
				20	// must remain constant and valid during the lifetime of the object. The
				21	// current position is set to the first element in the \|data\|.
				22	explicit Input(const std::string& data)
				23	: data_(data), current_(data_.begin()) {}
				24
				25	Input(const Input&) = delete;
				26	Input(Input&&) = delete;
				27
				28	// Returns the iterator to the current position. The iterator is from the
				29	// input string given in the constructor and is always valid (but may be
				30	// equal \|data.end()\|).
				31	std::string::const_iterator GetCurrentPosition() const { return current_; }
				32
				33	// Returns the value of the current character. If the current position is set
				34	// to \|data.end()\|, this method returns '\0'.
				35	char GetCurrentCharacter() const {
				36	if (current_ == data_.end())
				37	return '\0';
				38	return *current_;
				39	}
				40
				41	// Returns true <=> a character at the current position equals \|c\|. If the
				42	// current position is set to \|data.end()\|, it returns false.
				43	bool CurrentCharIs(char c) const {
				44	return (current_ != data_.end() && *current_ == c);
				45	}
				46
				47	// Returns true <=> a character at the current position is ByteNative (see
				48	// grammar.h for details). If the current position is set to \|data.end()\|,
				49	// it returns false.
				50	bool CurrentCharIsByteNative() const {
				51	if (current_ == data_.end())
				52	return false;
				53	if (current_ >= 'A' && current_ <= 'Z')
				54	return true;
				55	if (current_ >= 'a' && current_ <= 'z')
				56	return true;
				57	if (current_ >= '0' && current_ <= '9')
				58	return true;
				59	return (std::string("./_+-@%").find(*current_) != std::string::npos);
				60	}
				61
				62	// Returns true <=> a character at the current position equals to one of the
				63	// elements of \|chars\|. If the current position is set to \|data.end()\|,
				64	// it returns false.
				65	bool CurrentCharIsOneOf(const std::string& chars) const {
				66	if (current_ == data_.end())
				67	return false;
				68	return (chars.find(*current_) != std::string::npos);
				69	}
				70
				71	// Returns true <=> the current position is set to \|data.end()\|.
				72	bool CurrentCharIsEOF() const { return (current_ == data_.end()); }
				73
				74	// Move the current position to the next element. If the current position
				75	// is set to \|data.end()\|, it does nothing.
				76	void MoveToNext() {
				77	if (current_ != data_.end())
				78	++current_;
				79	}
				80
				81	private:
				82	const std::string& data_;
				83	std::string::const_iterator current_;
				84	};
				85
				86	Scanner::Scanner(const std::string& data)
				87	: data_(std::make_unique<Input>(data)) {}
				88	Scanner::~Scanner() {}
				89
				90	// Parses the following (see grammar.h for details):
Piotr Pawliczek	7bb6e51	2020-07-13 10:59:36 -0700	[diff] [blame]	91	// LiteralString = "'" , { ByteCommon \| '"' \| "`" \| "\" } , "'" ;
Piotr Pawliczek	cdd921f	2020-05-06 17:35:58 -0700	[diff] [blame]	92	// The current position must be one the opening '. It moves cursor to the first
				93	// character after the closing '. The resultant token is added to \|tokens\|.
				94	// \|tokens\| must not be nullptr. Returns false in case of an error.
				95	bool Scanner::ParseLiteralString(std::vector<Token>* tokens) {
				96	DCHECK(tokens != nullptr);
				97	DCHECK(data_->CurrentCharIs('\''));
				98
				99	// Skip the opening '.
				100	data_->MoveToNext();
				101
				102	// Create a new token.
				103	tokens->resize(tokens->size() + 1);
				104	Token* out = &(tokens->back());
				105	out->type = Token::Type::kLiteralString;
				106	out->begin = data_->GetCurrentPosition();
				107
				108	// Move forward until we find EOF or the closing '.
				109	while (!data_->CurrentCharIsEOF()) {
				110	if (data_->CurrentCharIs('\'')) {
				111	// The closing ' was found.
				112	out->end = data_->GetCurrentPosition();
				113	out->value.assign(out->begin, out->end);
				114	// Skip the closing '.
				115	data_->MoveToNext();
				116	// Success!
				117	return true;
				118	}
				119	data_->MoveToNext();
				120	}
				121
				122	// There is no closing '.
				123	out->end = data_->GetCurrentPosition();
				124	message_ = "Unexpected EOF when parsing '...' (literal string)";
				125	return false;
				126	}
				127
				128	// Parses the following (see grammar.h for details):
Piotr Pawliczek	7bb6e51	2020-07-13 10:59:36 -0700	[diff] [blame]	129	// ExecutedString = "`" , { ByteCommon \| "'" \| '"' \| ("\",ByteAny) } , "`" ;
Piotr Pawliczek	cdd921f	2020-05-06 17:35:58 -0700	[diff] [blame]	130	// The current position must be one the opening `. It moves cursor to the first
				131	// character after the closing `. The resultant token is added to \|tokens\|.
				132	// \|tokens\| must not be nullptr. Returns false in case of an error.
				133	bool Scanner::ParseExecutedString(std::vector<Token>* tokens) {
				134	DCHECK(tokens != nullptr);
				135	DCHECK(data_->CurrentCharIs('`'));
				136
				137	// Skip the opening `.
				138	data_->MoveToNext();
				139
				140	// Create a new token.
				141	tokens->resize(tokens->size() + 1);
				142	Token* out = &(tokens->back());
				143	out->type = Token::Type::kExecutedString;
				144	out->begin = data_->GetCurrentPosition();
				145
				146	// Move forward until we find EOF or the closing `.
				147	while (!data_->CurrentCharIsEOF()) {
				148	if (data_->CurrentCharIs('`')) {
				149	// The closing ` was found.
				150	out->end = data_->GetCurrentPosition();
				151	// Skip the closing `.
				152	data_->MoveToNext();
				153	// Success!
				154	return true;
				155	}
				156	// The escape character (\) works in ExecutedString for ByteAny.
				157	if (data_->CurrentCharIs('\\')) {
				158	data_->MoveToNext();
				159	if (data_->CurrentCharIsEOF())
				160	break;
				161	}
				162	// Save the current character and move to the next element.
				163	out->value.push_back(data_->GetCurrentCharacter());
				164	data_->MoveToNext();
				165	}
				166
				167	// There is no closing `.
				168	out->end = data_->GetCurrentPosition();
				169	message_ = "Unexpected EOF when parsing `...` (executed string)";
				170	return false;
				171	}
				172
				173	// Parses the following (see grammar.h for details):
Piotr Pawliczek	7bb6e51	2020-07-13 10:59:36 -0700	[diff] [blame]	174	// InterpretedString = '"' , { ByteCommon \| "'" \| "\" \| ("\",'"') \| ("\","`")
				175	// \| ("\","\") \| ExecutedString } , '"' ;
Piotr Pawliczek	cdd921f	2020-05-06 17:35:58 -0700	[diff] [blame]	176	// The current position must be one the opening ". It moves cursor to the first
				177	// character after the closing ". If the string contains one or more
				178	// ExecutedString, it is split into a sequence of consecutive tokens of types
				179	// InterpretedString and ExecutedString. The resultant tokens are added to
				180	// \|tokens\|. \|tokens\| must not be nullptr. Returns false in case of an error.
				181	bool Scanner::ParseInterpretedString(std::vector<Token>* tokens) {
				182	DCHECK(tokens != nullptr);
				183	DCHECK(data_->CurrentCharIs('"'));
				184
				185	// Skip the opening ".
				186	data_->MoveToNext();
				187
				188	// Create a sequence of alternating InterpretedString and ExecutedString
				189	// tokens.
				190	while (true) {
				191	// Create a new InterpretedString token.
				192	tokens->resize(tokens->size() + 1);
				193	Token* out = &(tokens->back());
				194	out->type = Token::Type::kInterpretedString;
				195	out->begin = data_->GetCurrentPosition();
				196
				197	// Move forward until we find EOF, the closing " or the opening `.
				198	while (true) {
				199	if (data_->CurrentCharIs('"')) {
				200	// The closing " was found.
				201	out->end = data_->GetCurrentPosition();
				202	data_->MoveToNext();
				203	return true;
				204	}
				205	if (data_->CurrentCharIs('`')) {
				206	// The opening ` was found. We finish the current token and
				207	// add a new ExecutedString token.
				208	out->end = data_->GetCurrentPosition();
				209	if (!ParseExecutedString(tokens))
				210	return false;
				211	// We break the internal loop to create a new InterpretedString
				212	// token.
				213	break;
				214	}
				215	if (data_->CurrentCharIs('\\')) {
				216	// It may be an escape character for " or `.
				217	data_->MoveToNext();
				218	if (data_->CurrentCharIsOneOf("\"`\\")) {
				219	// The next character is " or `. Just skip \ and go ahead.
				220	} else {
				221	// It was not an escape character. We have to add a skipped \.
				222	out->value.push_back('\\');
				223	}
				224	}
				225	if (data_->CurrentCharIsEOF()) {
				226	// There is no closing ".
				227	out->end = data_->GetCurrentPosition();
				228	message_ = "Unexpected EOF when parsing \"...\" (interpreted string)";
				229	return false;
				230	}
				231	// Save the current character and move to the next element.
				232	out->value.push_back(data_->GetCurrentCharacter());
				233	data_->MoveToNext();
				234	}
				235	}
				236	}
				237
				238	// Parses the following (see grammar.h for details):
Piotr Pawliczek	7bb6e51	2020-07-13 10:59:36 -0700	[diff] [blame]	239	// NativeString = { ByteNative \| ("\",ByteAny) }- ;
Piotr Pawliczek	cdd921f	2020-05-06 17:35:58 -0700	[diff] [blame]	240	// The current position must be one the first character of NativeString. It
				241	// moves cursor to the first character after the end of the string. The
				242	// resultant token is added to \|tokens\|. \|tokens\| must not be nullptr. Returns
				243	// false in case of an error.
				244	bool Scanner::ParseNativeString(std::vector<Token>* tokens) {
				245	DCHECK(tokens != nullptr);
				246	DCHECK(data_->CurrentCharIsByteNative() \|\| data_->CurrentCharIs('\\'));
				247
				248	// Create a new token.
				249	tokens->resize(tokens->size() + 1);
				250	Token* out = &(tokens->back());
				251	out->type = Token::Type::kNativeString;
				252	out->begin = data_->GetCurrentPosition();
				253
				254	// Move forward until we find EOF or the end of the string.
				255	while (!data_->CurrentCharIsEOF()) {
				256	if (data_->CurrentCharIs('\\')) {
				257	// This is an escape character.
				258	data_->MoveToNext();
				259	if (data_->CurrentCharIsEOF()) {
				260	// It is an error: EOF after the escape character.
				261	out->end = data_->GetCurrentPosition();
				262	message_ = "Unexpected EOF after escape character (\\)";
				263	return false;
				264	}
				265	// Add the escaped character to the string.
				266	out->value.push_back(data_->GetCurrentCharacter());
				267	// Go to the next character.
				268	data_->MoveToNext();
				269	continue;
				270	}
				271
				272	// If the current character is not a ByteNative, we found the end of the
				273	// string.
				274	if (!data_->CurrentCharIsByteNative())
				275	break;
				276
				277	// Save the current character and move to the next element.
				278	out->value.push_back(data_->GetCurrentCharacter());
				279	data_->MoveToNext();
				280	}
				281
				282	// We are at EOF or at the first character not being part of the string.
				283	out->end = data_->GetCurrentPosition();
				284	return true;
				285	}
				286
				287	bool Scanner::ParseWholeInput(std::vector<Token>* tokens) {
				288	DCHECK(tokens != nullptr);
				289
				290	while (!data_->CurrentCharIsEOF()) {
				291	// Check for different types of string.
				292	if (data_->CurrentCharIs('\'')) {
				293	if (!ParseLiteralString(tokens))
				294	return false;
				295	continue;
				296	}
				297	if (data_->CurrentCharIs('"')) {
				298	if (!ParseInterpretedString(tokens))
				299	return false;
				300	continue;
				301	}
				302	if (data_->CurrentCharIs('`')) {
				303	if (!ParseExecutedString(tokens))
				304	return false;
				305	continue;
				306	}
				307	if (data_->CurrentCharIsByteNative() \|\| data_->CurrentCharIs('\\')) {
				308	if (!ParseNativeString(tokens))
				309	return false;
				310	continue;
				311	}
				312
				313	// Create a new token.
				314	tokens->resize(tokens->size() + 1);
				315	Token& token = tokens->back();
				316
				317	if (data_->CurrentCharIsOneOf(" \t")) {
				318	// It is a Space token.
Piotr Pawliczek	7bb6e51	2020-07-13 10:59:36 -0700	[diff] [blame]	319	// Space = { " " \| Tabulator }- ;
Piotr Pawliczek	cdd921f	2020-05-06 17:35:58 -0700	[diff] [blame]	320	token.type = Token::Type::kSpace;
				321	token.begin = data_->GetCurrentPosition();
				322	// Move forward until we find the first character not being part of
				323	// the Space token. It stops also at EOF.
				324	while (data_->CurrentCharIsOneOf(" \t"))
				325	data_->MoveToNext();
				326	token.end = data_->GetCurrentPosition();
				327	continue;
				328	}
				329
				330	// Add a single character as a token.
				331	token.type = Token::Type::kByte;
				332	token.begin = data_->GetCurrentPosition();
				333	data_->MoveToNext();
				334	token.end = data_->GetCurrentPosition();
				335	token.value.assign(token.begin, token.end);
				336	}
				337
				338	// Add a special EOF token at the end.
				339	tokens->resize(tokens->size() + 1);
				340	tokens->back().type = Token::Type::kEOF;
				341	tokens->back().begin = tokens->back().end = data_->GetCurrentPosition();
				342	return true;
				343	}
				344
				345	std::string::const_iterator Scanner::GetPosition() const {
				346	return data_->GetCurrentPosition();
				347	}
				348
				349	} // namespace foomatic_shell