1use crate::errors::{Result, RunjucksError};
18
19#[derive(Clone, Debug, PartialEq, Eq)]
21pub struct Tags {
22 pub block_start: String,
23 pub block_end: String,
24 pub variable_start: String,
25 pub variable_end: String,
26 pub comment_start: String,
27 pub comment_end: String,
28}
29
30impl Default for Tags {
31 fn default() -> Self {
32 Self {
33 block_start: "{%".into(),
34 block_end: "%}".into(),
35 variable_start: "{{".into(),
36 variable_end: "}}".into(),
37 comment_start: "{#".into(),
38 comment_end: "#}".into(),
39 }
40 }
41}
42
43#[derive(Clone, Debug, Default)]
47pub struct LexerOptions {
48 pub trim_blocks: bool,
50 pub lstrip_blocks: bool,
53 pub tags: Option<Tags>,
55}
56
57#[derive(Clone, Copy, Debug, PartialEq, Eq)]
58enum OpenKind {
59 Comment,
60 Tag { trim_open: bool },
61 Var { trim_open: bool },
62}
63
64fn next_opener(rest: &str, tags: &Tags) -> Option<(usize, OpenKind)> {
65 let bs = &tags.block_start;
66 let vs = &tags.variable_start;
67 let cs = &tags.comment_start;
68 let bs_trim = format!("{bs}-");
69
70 let vs_trim = format!("{vs}-");
71
72 let mut best: Option<(usize, OpenKind)> = None;
73 for (i, _) in rest.char_indices() {
74 let s = &rest[i..];
75 let candidate = if s.starts_with(cs.as_str()) {
76 Some((i, OpenKind::Comment))
77 } else if s.starts_with(bs_trim.as_str()) {
78 Some((i, OpenKind::Tag { trim_open: true }))
79 } else if s.starts_with(bs.as_str()) {
80 Some((i, OpenKind::Tag { trim_open: false }))
81 } else if s.starts_with(vs_trim.as_str()) {
82 Some((i, OpenKind::Var { trim_open: true }))
83 } else if s.starts_with(vs.as_str()) {
84 Some((i, OpenKind::Var { trim_open: false }))
85 } else {
86 None
87 };
88 if let Some((idx, kind)) = candidate {
89 best = match best {
90 None => Some((idx, kind)),
91 Some((bi, _)) if idx < bi => Some((idx, kind)),
92 Some(b) => Some(b),
93 };
94 }
95 }
96 best
97}
98
99fn parse_tag_prefix(rest: &str, tags: &Tags) -> Result<(String, usize, bool)> {
100 let bs = &tags.block_start;
101 let bs_trim = format!("{bs}-");
102 let open_len = if rest.starts_with(bs_trim.as_str()) {
103 bs_trim.len()
104 } else if rest.starts_with(bs.as_str()) {
105 bs.len()
106 } else {
107 return Err(RunjucksError::new(format!(
108 "internal lexer error: expected `{bs}`"
109 )));
110 };
111 let after_open = &rest[open_len..];
112 let (body_end, close_len) = find_tag_close(after_open, &tags.block_end)?;
113 let trim_close_marker = format!("-{}", tags.block_end);
114 let trim_close = after_open[body_end..].starts_with(trim_close_marker.as_str());
115 let body = after_open[..body_end].trim().to_string();
116 let total = open_len + body_end + close_len;
117 Ok((body, total, trim_close))
118}
119
120fn find_matching_block_close(
126 rest: &str,
127 open_name: &str,
128 end_name: &str,
129 tags: &Tags,
130) -> Result<usize> {
131 let bs = &tags.block_start;
132 let bs_trim = format!("{bs}-");
133 let open_prefix = format!("{open_name} ");
134 let end_prefix = format!("{end_name} ");
135 let mut pos = 0usize;
136 let mut level = 1usize;
137 while pos < rest.len() {
138 let slice = &rest[pos..];
139 if !slice.starts_with(bs.as_str()) && !slice.starts_with(bs_trim.as_str()) {
140 let adv = slice.chars().next().map(|c| c.len_utf8()).unwrap_or(1);
141 pos += adv;
142 continue;
143 }
144 let tag_start = pos;
145 let (body, total, _) = match parse_tag_prefix(slice, tags) {
146 Ok(t) => t,
147 Err(_) => {
148 pos += slice.chars().next().map(|c| c.len_utf8()).unwrap_or(1);
149 continue;
150 }
151 };
152 if body.contains(bs.as_str()) {
153 pos += slice.chars().next().map(|c| c.len_utf8()).unwrap_or(1);
154 continue;
155 }
156 let is_open = body == open_name || body.starts_with(&open_prefix);
157 let is_close = body == end_name || body.starts_with(&end_prefix);
158 if is_open {
159 level += 1;
160 } else if is_close {
161 level = level.saturating_sub(1);
162 if level == 0 {
163 return Ok(tag_start);
164 }
165 }
166 pos = tag_start + total;
167 }
168 Err(RunjucksError::new(format!(
169 "unclosed {end_name} block: expected matching `{}` tag",
170 tags.block_end
171 )))
172}
173
174#[derive(Clone, Copy, PartialEq, Eq)]
175enum StringScan {
176 Code,
177 String,
178 StringEscape,
179}
180
181fn find_var_close(after_open: &str, tags: &Tags) -> Result<(usize, usize)> {
182 let ve = &tags.variable_end;
183 let vs = &tags.variable_start;
184 let trim_close = format!("-{ve}");
185 let mut state = StringScan::Code;
186 let mut i = 0usize;
187 while i < after_open.len() {
188 match state {
189 StringScan::StringEscape => {
190 let c = after_open[i..].chars().next().unwrap();
191 state = StringScan::String;
192 i += c.len_utf8();
193 }
194 StringScan::String => {
195 let rest = &after_open[i..];
196 let c = rest.chars().next().unwrap();
197 if c == '\\' {
198 state = StringScan::StringEscape;
199 } else if c == '"' {
200 state = StringScan::Code;
201 }
202 i += c.len_utf8();
203 }
204 StringScan::Code => {
205 let rest = &after_open[i..];
206 if rest.starts_with(trim_close.as_str()) {
207 return Ok((i, trim_close.len()));
208 }
209 if rest.starts_with(ve.as_str()) {
210 return Ok((i, ve.len()));
211 }
212 if rest.starts_with(vs.as_str()) {
213 return Err(RunjucksError::new(format!(
214 "nested `{vs}` inside a variable expression is not allowed"
215 )));
216 }
217 if rest.starts_with('"') {
218 state = StringScan::String;
219 i += 1;
220 continue;
221 }
222 let c = rest.chars().next().unwrap();
223 i += c.len_utf8();
224 }
225 }
226 }
227 Err(RunjucksError::new(format!(
228 "unclosed variable tag: expected `{ve}` or `-{ve}` after `{vs}`"
229 )))
230}
231
232fn find_tag_close(after_open: &str, block_end: &str) -> Result<(usize, usize)> {
233 let trim_close = format!("-{block_end}");
234 let mut state = StringScan::Code;
235 let mut i = 0usize;
236 while i < after_open.len() {
237 match state {
238 StringScan::StringEscape => {
239 let c = after_open[i..].chars().next().unwrap();
240 state = StringScan::String;
241 i += c.len_utf8();
242 }
243 StringScan::String => {
244 let rest = &after_open[i..];
245 let c = rest.chars().next().unwrap();
246 if c == '\\' {
247 state = StringScan::StringEscape;
248 } else if c == '"' {
249 state = StringScan::Code;
250 }
251 i += c.len_utf8();
252 }
253 StringScan::Code => {
254 let rest = &after_open[i..];
255 if rest.starts_with(trim_close.as_str()) {
256 return Ok((i, trim_close.len()));
257 }
258 if rest.starts_with(block_end) {
259 return Ok((i, block_end.len()));
260 }
261 if rest.starts_with('"') {
262 state = StringScan::String;
263 i += 1;
264 continue;
265 }
266 let c = rest.chars().next().unwrap();
267 i += c.len_utf8();
268 }
269 }
270 }
271 Err(RunjucksError::new(format!(
272 "unclosed template tag: expected `{block_end}` or `-{block_end}` after block start"
273 )))
274}
275
276fn apply_var_trim(body: &str, trim_open: bool, trim_close: bool) -> String {
277 let mut s = body;
278 if trim_open {
279 s = s.trim_start();
280 }
281 if trim_close {
282 s = s.trim_end();
283 }
284 s.to_string()
285}
286
287#[derive(Debug, Clone, PartialEq, Eq)]
289pub enum Token {
290 Text(String),
291 Expression(String),
292 Tag(String),
293}
294
295#[derive(Clone, Copy, Debug, PartialEq, Eq)]
296enum LexerMode {
297 Normal,
298 Raw,
299 Verbatim,
300}
301
302#[derive(Debug, Clone)]
304pub struct Lexer<'a> {
305 input: &'a str,
306 position: usize,
307 mode: LexerMode,
308 pending: Option<Token>,
309 strip_leading_next_text: bool,
311 opts: LexerOptions,
312 tags: Tags,
313 trim_block_newline: bool,
315}
316
317impl<'a> Lexer<'a> {
318 pub fn new(input: &'a str) -> Self {
319 Self::with_options(input, LexerOptions::default())
320 }
321
322 pub fn with_options(input: &'a str, opts: LexerOptions) -> Self {
323 let tags = opts.tags.clone().unwrap_or_default();
324 Self {
325 input,
326 position: 0,
327 mode: LexerMode::Normal,
328 pending: None,
329 strip_leading_next_text: false,
330 opts,
331 tags,
332 trim_block_newline: false,
333 }
334 }
335
336 #[inline]
337 pub fn rest(&self) -> &'a str {
338 self.input.get(self.position..).unwrap_or("")
339 }
340
341 #[inline]
342 pub fn is_eof(&self) -> bool {
343 self.position >= self.input.len()
344 }
345
346 fn skip_comment(&mut self) -> Result<()> {
347 let cs = &self.tags.comment_start;
348 let ce = &self.tags.comment_end;
349 let rest = self.rest();
350 if !rest.starts_with(cs.as_str()) {
351 return Err(RunjucksError::new(format!(
352 "internal lexer error: expected `{cs}`"
353 )));
354 }
355 let Some(end_rel) = rest.find(ce.as_str()) else {
356 return Err(RunjucksError::new(format!(
357 "unclosed comment: expected `{ce}` after `{cs}`"
358 )));
359 };
360 self.position += end_rel + ce.len();
361 Ok(())
362 }
363
364 fn consume_variable(&mut self, trim_open: bool) -> Result<Token> {
365 let vs = &self.tags.variable_start;
366 let vs_trim = format!("{vs}-");
367 let rest = self.rest();
368 let open_len = if rest.starts_with(vs_trim.as_str()) {
369 vs_trim.len()
370 } else {
371 vs.len()
372 };
373 self.position += open_len;
374 let after_open = self.rest();
375 let (body_end, close_len) = find_var_close(after_open, &self.tags)?;
376 let trim_close_marker = format!("-{}", self.tags.variable_end);
377 let trim_close = after_open[body_end..].starts_with(trim_close_marker.as_str());
378 let body = &after_open[..body_end];
379 let expr = apply_var_trim(body, trim_open, trim_close);
380 self.position += body_end + close_len;
381 if trim_close {
382 self.strip_leading_next_text = true;
383 }
384 Ok(Token::Expression(expr))
385 }
386
387 fn consume_tag_at_position(&mut self) -> Result<String> {
388 let rest = self.rest();
389 let (body, total, trim_close) = parse_tag_prefix(rest, &self.tags)?;
390 self.position += total;
391 if trim_close {
392 self.strip_leading_next_text = true;
393 } else if self.opts.trim_blocks {
394 self.trim_block_newline = true;
395 }
396 Ok(body)
397 }
398
399 fn end_tag_name(mode: LexerMode) -> &'static str {
400 match mode {
401 LexerMode::Raw => "endraw",
402 LexerMode::Verbatim => "endverbatim",
403 LexerMode::Normal => "",
404 }
405 }
406
407 fn open_tag_name(mode: LexerMode) -> &'static str {
408 match mode {
409 LexerMode::Raw => "raw",
410 LexerMode::Verbatim => "verbatim",
411 LexerMode::Normal => "",
412 }
413 }
414
415 fn next_token_block_mode(&mut self) -> Result<Option<Token>> {
416 let mode = self.mode;
417 let open_name = Self::open_tag_name(mode);
418 let end_name = Self::end_tag_name(mode);
419 let rest = self.rest();
420 let idx = find_matching_block_close(rest, open_name, end_name, &self.tags)?;
421 let mut literal = rest[..idx].to_string();
422 self.apply_leading_strip(&mut literal);
425 self.position += idx;
426 let rest2 = self.rest();
427 let (body, total, trim_close) = parse_tag_prefix(rest2, &self.tags)?;
428 self.position += total;
429 if trim_close {
430 self.strip_leading_next_text = true;
431 }
432 self.mode = LexerMode::Normal;
433 if !literal.is_empty() {
434 self.pending = Some(Token::Tag(body));
435 return Ok(Some(Token::Text(literal)));
436 }
437 Ok(Some(Token::Tag(body)))
438 }
439
440 fn apply_leading_strip(&mut self, text: &mut String) {
442 if self.strip_leading_next_text {
443 *text = text.trim_start().to_string();
444 self.strip_leading_next_text = false;
445 self.trim_block_newline = false;
446 } else if self.trim_block_newline {
447 if text.starts_with('\n') {
448 text.remove(0);
449 } else if text.starts_with("\r\n") {
450 text.drain(..2);
451 }
452 self.trim_block_newline = false;
453 }
454 }
455
456 fn apply_lstrip_trailing(&self, text: &mut String, kind: OpenKind) {
461 if !self.opts.lstrip_blocks {
462 return;
463 }
464 let is_block = matches!(kind, OpenKind::Tag { .. } | OpenKind::Comment);
465 if !is_block {
466 return;
467 }
468 if let Some(nl) = text.rfind('\n') {
469 let after_nl = &text[nl + 1..];
470 if after_nl.chars().all(|c| c == ' ' || c == '\t') {
471 text.truncate(nl + 1);
472 }
473 } else if text.chars().all(|c| c == ' ' || c == '\t') {
474 text.clear();
475 }
476 }
477
478 fn next_token_normal(&mut self) -> Result<Option<Token>> {
479 loop {
480 if self.is_eof() {
481 return Ok(None);
482 }
483
484 let rest = self.rest();
485
486 match next_opener(rest, &self.tags) {
487 None => {
488 let mut text = rest.to_owned();
489 self.apply_leading_strip(&mut text);
490 self.position = self.input.len();
491 return Ok(Some(Token::Text(text)));
492 }
493 Some((0, OpenKind::Comment)) => {
494 self.skip_comment()?;
495 continue;
496 }
497 Some((0, OpenKind::Tag { .. })) => {
498 let body = self.consume_tag_at_position()?;
499 if body == "raw" || body.starts_with("raw ") {
500 self.mode = LexerMode::Raw;
501 } else if body == "verbatim" || body.starts_with("verbatim ") {
502 self.mode = LexerMode::Verbatim;
503 }
504 return Ok(Some(Token::Tag(body)));
505 }
506 Some((0, OpenKind::Var { trim_open })) => {
507 return self.consume_variable(trim_open).map(Some);
508 }
509 Some((idx, kind)) => {
510 let mut text = rest[..idx].to_owned();
511 self.apply_leading_strip(&mut text);
512 let trim_open = matches!(
513 kind,
514 OpenKind::Tag { trim_open: true } | OpenKind::Var { trim_open: true }
515 );
516 if trim_open {
517 text = text.trim_end().to_string();
518 }
519 self.apply_lstrip_trailing(&mut text, kind);
520 self.position += idx;
521 if text.is_empty() {
522 continue;
523 }
524 return Ok(Some(Token::Text(text)));
525 }
526 }
527 }
528 }
529
530 pub fn next_token(&mut self) -> Result<Option<Token>> {
531 if let Some(t) = self.pending.take() {
532 return Ok(Some(t));
533 }
534 match self.mode {
535 LexerMode::Normal => self.next_token_normal(),
536 LexerMode::Raw | LexerMode::Verbatim => self.next_token_block_mode(),
537 }
538 }
539}
540
541pub fn tokenize(input: &str) -> Result<Vec<Token>> {
555 tokenize_with_options(input, LexerOptions::default())
556}
557
558pub fn tokenize_with_options(input: &str, opts: LexerOptions) -> Result<Vec<Token>> {
560 if input.is_empty() {
561 return Ok(vec![Token::Text(String::new())]);
562 }
563 let mut lexer = Lexer::with_options(input, opts);
564 let est = (input.len() / 24).saturating_add(4).min(8192);
567 let mut tokens = Vec::with_capacity(est);
568 while let Some(t) = lexer.next_token()? {
569 tokens.push(t);
570 }
571 Ok(tokens)
572}