I've written the second iteration of my math expression parser, utilising what I learned from the first attempt to make a more reliable, maintainable piece of code.
If anybody wants to see the first iteration to see my progress, seehere.
With the first version, I tried to evaluate the expression string directly, without tokenising it and running it through several levels of progressive processing. It turned into a hard-to-follow mess of boolean flags andif-else blocks.
With the second version, I split the process up into various stages:
- I replace the variables with their stored values.
- I look for parentheses and parse those recursively, replacing the range of tokens the brackets spanned with their evaluated values.
- I parse for negative values, making the proceeding token's value either positive or negative, and removing the operator token.
- I parse for function calls, and replace their token ranges with the resulting value.
- I parse the arithmetic operators in the standard order of operations, deducing the value on each round until there is only one value left. The value is then returned.
const NUMERIC_CHARSET = '01234567890.', ALPHA_CHARSET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_', OPERATOR_CHARSET = '+-/*^%', WHITE_SPACE_REGEX = /\s/;const MathFunctions = { sin: radians => Math.sin(radians), cos: radians => Math.cos(radians), tan: radians => Math.tan(radians), fact: value => { var iter, multiplier; for(multiplier = value - 1; multiplier > 0; --multiplier) { value *= multiplier; } return value; }, exp: value => Math.exp(value), sqrt: value => Math.sqrt(value), ceil: value => Math.ceil(value), floor: value => Math.floor(value), abs: value => Math.abs(value), acos: value => Math.acos(value), asin: value => Math.asin(value), atan: value => Math.atan(value), log: value => Math.log(value), round: value => Math.round(value)};const Helpers = { isNumeric: char => NUMERIC_CHARSET.indexOf(char) !== -1, isAlpha: char => ALPHA_CHARSET.indexOf(char) !== -1, isOperator: char => OPERATOR_CHARSET.indexOf(char) !== -1, isMathFunction: keyword => typeof MathFunctions[keyword] === 'function', isWhitespace: char => WHITE_SPACE_REGEX.test(char), radians: angle => angle * Math.PI / 180};const OperatorFunctions = { '+': (left, right) => left + right, '-': (left, right) => left - right, '/': (left, right) => left / right, '*': (left, right) => left * right, '%': (left, right) => left % right, '^': (left, right) => Math.pow(left, right)};function ExpressionParser() { 'use strict'; this.variables = { pi: Math.PI, PI: Math.PI, e: Math.E, E: Math.E, rand: () => Math.random() }; this.readOnlyVariables = [ ]; for(var varName in this.variables) { this.readOnlyVariables.push(varName); }};/* Sets a variable */ExpressionParser.prototype.setVariable = function(name, value) { 'use strict'; if(this.readOnlyVariables.indexOf(name) !== -1) { throw new Error('Error: Cannot set read only variable "' + name + '"'); } this.variables[name] = value;};/* Gets a variable */ExpressionParser.prototype.getVariable = function(name) { 'use strict'; if(this.isVariable(name)) { var variable = this.variables[name]; if(typeof variable === 'function') { return variable(); } return variable; }};/* Checks if a variable exists */ExpressionParser.prototype.isVariable = function(name) { 'use strict'; return this.variables.hasOwnProperty(name);};/* Parse an expression */ExpressionParser.prototype.parse = function(expression) { 'use strict'; var tokens = this.tokenize(expression); tokens = this.parseTokens(tokens); var tokensLength = tokens.length, iter, value = null, last_number = null, flag_assignment = false; for(iter = 0; iter < tokensLength; ++iter) { // Get the value if(tokens[iter][0] === 'number') { value = tokens[iter][1]; } if(tokens[iter][0] === 'assignment') { if( iter - 1 === 0 && // Check there is a keyword previous iter + 1 < tokensLength && // Check there is a value to set next tokens[iter - 1][0] === 'keyword' ) { flag_assignment = true; } else { throw new Error('Error: Unexpected assignment'); } } } if(flag_assignment) { this.setVariable(tokens[0][1], value); } return value;};/* Parse tokens */ExpressionParser.prototype.parseTokens = function(tokens) { 'use strict'; tokens = this.parseVariables(tokens); tokens = this.parseBrackets(tokens); tokens = this.parseNegatives(tokens); tokens = this.parseFunctions(tokens); tokens = this.parseOperations(tokens); return tokens;};ExpressionParser.prototype.parseBrackets = function(tokens) { 'use strict'; var tokensLength = tokens.length, bracketDepth = 0, bracketIndex = 0, iter; for(iter = 0; iter < tokensLength; ++iter) { if(tokens[iter][0] === 'bracket') { if(bracketDepth > 0) { if(tokens[iter][1] === ')') { --bracketDepth; } if(bracketDepth === 0) { let leftSide = tokens.slice(0, bracketIndex), parsed = this.parseTokens(tokens.slice(bracketIndex + 1, iter)), rightSide = tokens.slice(iter + 1); tokens = leftSide.concat(parsed, rightSide); iter += tokens.length - tokensLength; tokensLength = tokens.length; } } if(tokens[iter][1] === '(') { if(bracketDepth === 0) { bracketIndex = iter; } ++bracketDepth; } } } return tokens;};ExpressionParser.prototype.parseNegatives = function(tokens) { 'use strict'; var tokensLength = tokens.length, iter; for(iter = 0; iter < tokensLength; ++iter) { // Logic for a negative number if( tokens[iter][0] === 'operator' && ( tokens[iter][1] === '-' || // Check it's a minus symbol tokens[iter][1] === '+' // Or a plus symbold ) && ( iter - 1 < 0 || // Either there is no previous token... tokens[iter - 1][0] !== 'number' // Or it's not a number ) && iter + 1 < tokensLength && // Check there is a proceeding token tokens[iter + 1][0] === 'number' // And it's a number ) { // Make the next number a negative tokens[iter + 1][1] = tokens[iter][1] === '-' ? -tokens[iter + 1][1] : tokens[iter + 1][1]; // Remove this token from stack tokens.splice(iter, 1); --tokensLength; --iter; continue; } } return tokens;};ExpressionParser.prototype.parseVariables = function(tokens) { 'use strict'; var tokensLength = tokens.length, iter; for(iter = 0; iter < tokensLength; ++iter) { if(tokens[iter][0] === 'keyword') { if( !Helpers.isMathFunction(tokens[iter][1]) && // Check it's not a function ( iter === tokensLength - 1 || // Either this is the last token tokens[iter + 1][0] !== 'assignment' // Or the next token is not an assignment ) ) { // Check variable exists if(this.isVariable(tokens[iter][1])) { tokens[iter][0] = 'number'; tokens[iter][1] = this.getVariable(tokens[iter][1]); continue; } else { throw new Error('Error: Undefined variable "' + tokens[iter][1] + '"'); } } } } return tokens;};ExpressionParser.prototype.parseFunctions = function(tokens) { 'use strict'; var tokensLength = tokens.length, iter; for(iter = 0; iter < tokensLength; ++iter) { if(tokens[iter][0] === 'keyword' && tokens[iter][1] in MathFunctions) { if( iter + 1 < tokensLength && // Check this is not the last token tokens[iter + 1][0] === 'number' // And the last next token is a number ) { // Apply math function tokens[iter + 1][1] = MathFunctions[tokens[iter][1]](tokens[iter + 1][1]); // Remove this token from stack tokens.splice(iter, 1); --tokensLength; --iter; } else { throw new Error('Error: unexpected function "' + tokens[iter][1] + '"'); } } } return tokens;};ExpressionParser.prototype.parseOperations = function(tokens) { 'use strict'; // Order of operations var operators = ['^', '*', '/', '+', '-']; operators.forEach(operator => { tokens = this.parseOperator(tokens, operator); }); return tokens;};ExpressionParser.prototype.parseOperator = function(tokens, operator) { 'use strict'; var tokensLength = tokens.length, iter; for(iter = 0; iter < tokensLength; ++iter) { var token = tokens[iter], token_type = token[0], token_value = token[1]; if(token_type === 'operator' && token_value === operator) { if( iter - 1 >= 0 && // Check there is a previous token iter + 1 < tokensLength && // Check there is a next token tokens[iter - 1][0] === 'number' && // Check the previous token is a number tokens[iter + 1][0] === 'number' // Check the next token is a number ) { tokens[iter + 1][1] = OperatorFunctions[token_value](tokens[iter - 1][1], tokens[iter + 1][1]); let leftSide = tokens.slice(0, iter - 1), rightSide = tokens.slice(iter + 1); // Replace sub-expression with the result value tokens = leftSide.concat(rightSide); iter += tokens.length - tokensLength; tokensLength = tokens.length; continue; } else { throw new Error('Error: unexpected operator "' + tokens[iter][1] + '"'); } } } return tokens;};/** * Split expression into tokens */ExpressionParser.prototype.tokenize = function(expression) { 'use strict'; // Append space so that the last character before that space is tokenised expression += ' '; // TOKENIZER VARS var expressionLength = expression.length, iter, tokens = [ ], buffer = ''; // FLAGS var flag_numeric = false, flag_keyword = false, flag_operator = false; // Iterate through expression for(iter = 0; iter < expressionLength; ++iter) { var char = expression.charAt(iter), char_isNumeric = Helpers.isNumeric(char), char_isOperator = Helpers.isOperator(char), char_isAlpha = Helpers.isAlpha(char); if(flag_keyword) { // We've reached the end of the keyword if(!char_isAlpha) { flag_keyword = false; tokens.push(['keyword', buffer]); buffer = ''; } } if(flag_numeric) { // We've reached the end of the number if(!char_isNumeric) { // Skip char if comma, so we can allow for formatted numbers if(char === ',' && iter + 1 < expressionLength && Helpers.isNumeric(expression[iter + 1])) { continue; } let parsingFunction = parseInt; if(buffer.indexOf('.') !== -1) { // Check for float parsingFunction = parseFloat; } tokens.push(['number', parsingFunction(buffer, 10)]); flag_numeric = false; buffer = ''; } } if(char_isNumeric) { // Check for a number flag_numeric = true; buffer += char; } else if(char_isAlpha) { // Check for a keyword flag_keyword = true; buffer += char; } else if(char_isOperator) { // Check for an operator tokens.push(['operator', char]); } else if(char === '(') { // Check for parentheses tokens.push(['bracket', '(']); } else if(char === ')') { // Check for closing parentheses tokens.push(['bracket', ')']); } else if(char === '=') { // Check for assignment tokens.push(['assignment', char]); } else if(!Helpers.isWhitespace(char)) { // Check for whitespace throw new Error('Error: Unexpected char "' + char + '"'); } } return tokens;};var ep = new ExpressionParser();process.stdin.resume();process.stdin.setEncoding('utf8');process.stdin.on('data', function(expression) { try { var result = ep.parse(expression); if(result !== false) { console.log(result) } } catch(e) { console.log(e.message); } process.stdout.write("> ");});console.log("Welcome to math2.js:");process.stdout.write("> ");Here is the console log for the tests I did:
$ node math2.jsWelcome to math2.js:> value = 11 + (exp(2.010635 + sin(PI/2)*3) + 50) / 2110.9999779427841> c = 3*10^8300000000> c*value33299993382.835228> sin pi1.2246467991473532e-16> sin (pi)1.2246467991473532e-16> fact 5120> fact(5 + 1)720> fact(c)Infinity> 22*-.5-11> 2,000 + 4,0006000> ,2000Error: Unexpected char ","> pi sinError: unexpected function "sin"> sin pi1.2246467991473532e-16> E^27.3890560989306495
I would like to know if there are things I could have done differently that would result in a more efficient process.
- \$\begingroup\$This is a lot of a code. If you don't get any answers, consider posting a smaller, self-contained snippet of this code, if that's possible...\$\endgroup\$Jonah– Jonah2016-06-18 18:26:22 +00:00CommentedJun 18, 2016 at 18:26
- 1\$\begingroup\$I completely understand that. Buts it's not really the kind of things that can be broken up into smaller snippets. I'm not looking for a complete in-depth review of every line, just for a more experienced programmer to maybe look it over and see if the general concept of how I'm going about the whole things is OK. I won't be surprised if nobody does, but I can at least ask.\$\endgroup\$thephpdev– thephpdev2016-06-18 18:38:19 +00:00CommentedJun 18, 2016 at 18:38
- \$\begingroup\$Well, not an answer to your question, since parsers and compilers aren't a subject I know too much about, but you might be interested in reading about "the expression problem". Here is agood lecture on it\$\endgroup\$Jonah– Jonah2016-06-18 18:51:19 +00:00CommentedJun 18, 2016 at 18:51
- \$\begingroup\$I have rolled back the last edit. Please seewhat you may and may not do after receiving answers.\$\endgroup\$Vogel612– Vogel6122016-06-18 21:31:00 +00:00CommentedJun 18, 2016 at 21:31
- \$\begingroup\$Is there a particular reason you're enabling strict mode only inside some functions?\$\endgroup\$gcampbell– gcampbell2016-06-26 11:16:21 +00:00CommentedJun 26, 2016 at 11:16
1 Answer1
One common theme I notice in the code is that you alter existing variables. For instancevalue in this function:
fact: value => { var iter, multiplier; for(multiplier = value - 1; multiplier > 0; --multiplier) { value *= multiplier; } return value;},There's a rule in ESLint calledno-param-reassign which will consider this an error if enabled. There'sa linked article with regards to why this is harmful, why that ESLint rule exists, and why you shouldn't be doing this. In a gist, altering the value of an argument alters thearguments object itself.
this.variables = { pi: Math.PI, PI: Math.PI, e: Math.E, E: Math.E, rand: () => Math.random()};I wonder why these aren't extracted to its own "enum", unless this is something unique to the expression parser.
this.readOnlyVariables = [ ];for(var varName in this.variables) { this.readOnlyVariables.push(varName);}Shorter:
Object.keys(this.variables).map(varName => varName);/* Gets a variable */ExpressionParser.prototype.getVariable = function(name) { 'use strict'; if(this.isVariable(name)) { var variable = this.variables[name]; if(typeof variable === 'function') { return variable(); } return variable; }};This code implies that there may be a case wherethis.isVariable(name) isfalse however there is noreturn for a falsy case. This can make this function a bit ambiguous especially when it is namedgetVariable. It implies that the function returns something. Ifundefined is a valid value, make it explicit in the code.
I can't speak much about the parser, although I believe parsing in the simplest sense is just exploding a string into tokens and trying to make sense of it. This may be as simple as having a string be exploded into an array and being fed through a loop of operations, with a few storages (stacks, registers, etc.) to keep state and markers.
In this case, array operations likeforEach,map,reduce andfilter might be handy in place of loops. From a performance perspective, loops are faster, but from a readability standpoint, array methods are often shorter and more concise.
- \$\begingroup\$Thank you for taking the time to take a look at this. I appreciate it. I see what you mean, and yes setting an object property is slower than setting a variable I suppose. All makes sense. I'll implement those suggestions and post an update.\$\endgroup\$thephpdev– thephpdev2016-06-18 20:41:04 +00:00CommentedJun 18, 2016 at 20:41
- \$\begingroup\$Sorry, I meant to add: the
this.variablesobject's initial properties are there for a reason, they are the initial variables that are set and are read only, so that the variablepicannot be set to something other than 3.142..., as for the rand variable, I suppose that was a bit of a hack to avoid adding support for a function that doesn't take the right-hand-side token as it's argument. I should fix that. As for the bit about loops, In a lot of places the loop variables need to be modified, therefore one of those array operations won't do, and of course from a performance aspect.\$\endgroup\$thephpdev– thephpdev2016-06-18 21:05:49 +00:00CommentedJun 18, 2016 at 21:05 - \$\begingroup\$I have updated my question now.\$\endgroup\$thephpdev– thephpdev2016-06-18 21:23:26 +00:00CommentedJun 18, 2016 at 21:23
You mustlog in to answer this question.
Explore related questions
See similar questions with these tags.

