Erebot  latest
A modular IRC bot for PHP 5.3+
Lexer.php
1 <?php
2 /*
3  This file is part of Erebot, a modular IRC bot written in PHP.
4 
5  Copyright © 2010 François Poirotte
6 
7  Erebot is free software: you can redistribute it and/or modify
8  it under the terms of the GNU General Public License as published by
9  the Free Software Foundation, either version 3 of the License, or
10  (at your option) any later version.
11 
12  Erebot is distributed in the hope that it will be useful,
13  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  GNU General Public License for more details.
16 
17  You should have received a copy of the GNU General Public License
18  along with Erebot. If not, see <http://www.gnu.org/licenses/>.
19 */
20 
21 namespace Erebot\Styling;
22 
28 class Lexer
29 {
31  protected $formula;
32 
34  protected $length;
35 
37  protected $position;
38 
40  protected $parser;
41 
42 
44  const PATT_INTEGER = '/^[0-9]+/';
45 
47  const PATT_REAL = '/^[0-9]*\.[0-9]+|^[0-9]+\.[0-9]*/';
48 
50  const PATT_VAR_NAME = '/^[a-zA-Z0-9_\\.]+/';
51 
52 
63  public function __construct($formula, array $vars)
64  {
65  $this->formula = $formula;
66  $this->length = strlen($formula);
67  $this->position = 0;
68  $this->parser = new \Erebot\Styling\Parser($vars);
69  $this->tokenize();
70  }
71 
78  public function getResult()
79  {
80  return $this->parser->getResult();
81  }
82 
84  protected function tokenize()
85  {
86  $operators = array(
87  '(' => \Erebot\Styling\Parser::TK_PAR_OPEN,
88  ')' => \Erebot\Styling\Parser::TK_PAR_CLOSE,
89  '+' => \Erebot\Styling\Parser::TK_OP_ADD,
90  '-' => \Erebot\Styling\Parser::TK_OP_SUB,
91  '#' => \Erebot\Styling\Parser::TK_OP_COUNT,
92  );
93 
94  while ($this->position < $this->length) {
95  $c = $this->formula[$this->position];
96  $subject = substr($this->formula, $this->position);
97 
98  // Operators ("(", ")", "+", "-" & "#").
99  if (isset($operators[$c])) {
100  $this->parser->doParse($operators[$c], $c);
101  $this->position++;
102  continue;
103  }
104 
105  // Real numbers (eg. "3.14").
106  if (preg_match(self::PATT_REAL, $subject, $matches)) {
107  $this->position += strlen($matches[0]);
108  $this->parser->doParse(
109  \Erebot\Styling\Parser::TK_NUMBER,
110  (double) $matches[0]
111  );
112  continue;
113  }
114 
115  // Integers (eg. "42").
116  if (preg_match(self::PATT_INTEGER, $subject, $matches)) {
117  $this->position += strlen($matches[0]);
118  $this->parser->doParse(
119  \Erebot\Styling\Parser::TK_NUMBER,
120  (int) $matches[0]
121  );
122  continue;
123  }
124 
125  // Whitespace.
126  if (strpos(" \t", $c) !== false) {
127  $this->position++;
128  continue;
129  }
130 
131  // Variable names.
132  if (preg_match(self::PATT_VAR_NAME, $subject, $matches)) {
133  $this->position += strlen($matches[0]);
134  $this->parser->doParse(
135  \Erebot\Styling\Parser::TK_VARIABLE,
136  $matches[0]
137  );
138  continue;
139  }
140 
141  // Raise an exception.
142  $this->parser->doParse(
143  \Erebot\Styling\Parser::YY_ERROR_ACTION,
144  $c
145  );
146  }
147 
148  // End of tokenization.
149  $this->parser->doParse(0, 0);
150  }
151 }
$length
Length of the formula.
Definition: Lexer.php:34
Provides styling (formatting) features.
Definition: Styling.php:113
Definition: CLI.php:21
__construct($formula, array $vars)
Definition: Lexer.php:63
const PATT_INTEGER
Allow stuff such as "1234".
Definition: Lexer.php:44
A lexer (tokenizer) for variables used in styling templates.
Definition: Lexer.php:28
$parser
Parser for the formula.
Definition: Lexer.php:40
const PATT_VAR_NAME
Pattern for variable names.
Definition: Lexer.php:50
const PATT_REAL
Allow stuff such as "1.23", "1." or ".23".
Definition: Lexer.php:47
$position
Current position in the formula.
Definition: Lexer.php:37
$formula
Formula to be tokenized.
Definition: Lexer.php:31
tokenize()
This method does the actual work.
Definition: Lexer.php:84