说明
每天10分钟,重构你的前端知识体系专栏笔记。
一、分析
按照编译原理相关的知识,将其分成几个步骤。
定义四则运算
:产出四则运算的词法定义和语法定义。词法分析
:把输入的字符串流变成 token。语法分析
:把 token 变成抽象语法树 AST。解释执行
:后序遍历 AST,执行得出结果。
二、定义四则运算
2.1、定义词法
- Token
- Number:
1 2 3 4 5 6 7 8 9 0
的组合 - Operator:
+、-、*、/
之一
- Whitespace:
- LineTerminator:
2.2、定义语法
语法定义多数采用 BNF。巴科斯范式(BNF: Backus-Naur Form
的缩写)是由 John Backus 和 Peter Naur 首次引入一种形式化符号来描述给定语言的语法(最早用于描述ALGOL 60 编程语言)。Javascript 标准里面就是一种跟 BNF 类似的自创语法。
1、加法是由若干个乘法再由加号或者减号连接成的:
<Expression> ::&#61;<AdditiveExpression><EOF><AdditiveExpression> ::&#61;<MultiplicativeExpression>|<AdditiveExpression><&#43;><MultiplicativeExpression>|<AdditiveExpression><-><MultiplicativeExpression>
2、可把普通数字当成乘法的一种特例&#xff1a;
<MultiplicativeExpression> ::&#61;<Number>|<MultiplicativeExpression><*><Number>|<MultiplicativeExpression></><Number>
上面就是四则运算的定义。
三、词法分析&#xff1a;状态机
词法分析&#xff1a;把字符流变成 token 流&#xff0c;有两种方案&#xff0c;一种是状态机&#xff0c;一种是正则表达式&#xff0c;它们是等效的。
3.1、实现状态机
var token &#61; [];
function start(char) {if(char &#61;&#61;&#61; &#39;1&#39; || char &#61;&#61;&#61; &#39;2&#39;|| char &#61;&#61;&#61; &#39;3&#39;|| char &#61;&#61;&#61; &#39;4&#39;|| char &#61;&#61;&#61; &#39;5&#39;|| char &#61;&#61;&#61; &#39;6&#39;|| char &#61;&#61;&#61; &#39;7&#39;|| char &#61;&#61;&#61; &#39;8&#39;|| char &#61;&#61;&#61; &#39;9&#39;|| char &#61;&#61;&#61; &#39;0&#39;) {token.push(char);return inNumber;}if(char &#61;&#61;&#61; &#39;&#43;&#39; || char &#61;&#61;&#61; &#39;-&#39; || char &#61;&#61;&#61; &#39;*&#39; || char &#61;&#61;&#61; &#39;/&#39;) {emmitToken(char, char);return start}if(char &#61;&#61;&#61; &#39; &#39;) {return start;}if(char &#61;&#61;&#61; &#39;\r&#39; || char &#61;&#61;&#61; &#39;\n&#39;) {return start;}
}
function inNumber(char) {if ( char &#61;&#61;&#61; &#39;1&#39; || char &#61;&#61;&#61; &#39;2&#39; || char &#61;&#61;&#61; &#39;3&#39;|| char &#61;&#61;&#61; &#39;4&#39;|| char &#61;&#61;&#61; &#39;5&#39; || char &#61;&#61;&#61; &#39;6&#39; || char &#61;&#61;&#61; &#39;7&#39;|| char &#61;&#61;&#61; &#39;8&#39; || char &#61;&#61;&#61; &#39;9&#39; || char &#61;&#61;&#61; &#39;0&#39;) {token.push(char);return inNumber;} else {emmitToken("Number", token.join(""));token &#61; [];return start(char);}
}
3.2、运行状态机
function emmitToken(type, value) {console.log(value);
}var input &#61; "1024 &#43; 2 * 256"var state &#61; start;for(var c of input.split(&#39;&#39;))state &#61; state(c);state(Symbol(&#39;EOF&#39;))
1024
&#43;
2
*
256
四、语法分析&#xff1a;LL
LL 语法分析根据每一个产生式来写一个函数。
4.1、写好函数名
function AdditiveExpression( ){}
function MultiplicativeExpression(){}
4.2、假设已经拿到 token
var tokens &#61; [{type:"Number",value: "1024"
}, {type:"&#43;",value: "&#43;"
}, {type:"Number",value: "2"
}, {type:"*",value: "*"
}, {type:"Number",value: "256"
}, {type:"EOF"
}];
4.3、AdditiveExpression处理
1、三种情况
<AdditiveExpression> ::&#61;<MultiplicativeExpression>|<AdditiveExpression><&#43;><MultiplicativeExpression>|<AdditiveExpression><-><MultiplicativeExpression>
2、AdditiveExpression 的写法
AdditiveExpression 的写法是根传入的节点&#xff0c;利用产生式合成新的节点。
function AdditiveExpression(source){if(source[0].type &#61;&#61;&#61; "MultiplicativeExpression") {let node &#61; {type:"AdditiveExpression",children:[source[0]]}source[0] &#61; node;return node;}if(source[0].type &#61;&#61;&#61; "AdditiveExpression" && source[1].type &#61;&#61;&#61; "&#43;") {let node &#61; {type:"AdditiveExpression",operator:"&#43;",children:[source.shift(), source.shift(), MultiplicativeExpression(source)]}source.unshift(node);}if(source[0].type &#61;&#61;&#61; "AdditiveExpression" && source[1].type &#61;&#61;&#61; "-") {let node &#61; {type:"AdditiveExpression",operator:"-",children:[source.shift(), source.shift(), MultiplicativeExpression(source)]}source.unshift(node);}
}
4.4、函数 Expression 处理
1、把解析好的 token 传给的顶层处理函数 Expression。
Expression(tokens);
2、如果 Expression 收到第一个 token&#xff0c;是个 Number&#xff0c;就需要对产生式的首项层层展开&#xff0c;根据所有可能性调用相应的处理函数&#xff0c;这个过程在编译原理中称为求 closure
。
function Expression(source){if(source[0].type &#61;&#61;&#61; "AdditiveExpression" && source[1] && source[1].type &#61;&#61;&#61; "EOF" ) {let node &#61; {type:"Expression",children:[source.shift(), source.shift()]}source.unshift(node);return node;}AdditiveExpression(source);return Expression(source);
}
function AdditiveExpression(source){if(source[0].type &#61;&#61;&#61; "MultiplicativeExpression") {let node &#61; {type:"AdditiveExpression",children:[source[0]]}source[0] &#61; node;return AdditiveExpression(source);}if(source[0].type &#61;&#61;&#61; "AdditiveExpression" && source[1] && source[1].type &#61;&#61;&#61; "&#43;") {let node &#61; {type:"AdditiveExpression",operator:"&#43;",children:[]}node.children.push(source.shift());node.children.push(source.shift());MultiplicativeExpression(source);node.children.push(source.shift());source.unshift(node);return AdditiveExpression(source);}if(source[0].type &#61;&#61;&#61; "AdditiveExpression" && source[1] && source[1].type &#61;&#61;&#61; "-") {let node &#61; {type:"AdditiveExpression",operator:"-",children:[]}node.children.push(source.shift());node.children.push(source.shift());MultiplicativeExpression(source);node.children.push(source.shift());source.unshift(node);return AdditiveExpression(source);}if(source[0].type &#61;&#61;&#61; "AdditiveExpression")return source[0];MultiplicativeExpression(source);return AdditiveExpression(source);
}
function MultiplicativeExpression(source){if(source[0].type &#61;&#61;&#61; "Number") {let node &#61; {type:"MultiplicativeExpression",children:[source[0]]}source[0] &#61; node;return MultiplicativeExpression(source);}if(source[0].type &#61;&#61;&#61; "MultiplicativeExpression" && source[1] && source[1].type &#61;&#61;&#61; "*") {let node &#61; {type:"MultiplicativeExpression",operator:"*",children:[]}node.children.push(source.shift());node.children.push(source.shift());node.children.push(source.shift());source.unshift(node);return MultiplicativeExpression(source);}if(source[0].type &#61;&#61;&#61; "MultiplicativeExpression"&& source[1] && source[1].type &#61;&#61;&#61; "/") {let node &#61; {type:"MultiplicativeExpression",operator:"/",children:[]}node.children.push(source.shift());node.children.push(source.shift());node.children.push(source.shift());source.unshift(node);return MultiplicativeExpression(source);}if(source[0].type &#61;&#61;&#61; "MultiplicativeExpression")return source[0];return MultiplicativeExpression(source);
};var source &#61; [{type:"Number",value: "3"
}, {type:"*",value: "*"
}, {type:"Number",value: "300"
}, {type:"&#43;",value: "&#43;"
}, {type:"Number",value: "2"
}, {type:"*",value: "*"
}, {type:"Number",value: "256"
}, {type:"EOF"
}];
var ast &#61; Expression(source);console.log(ast);
{type: "Expression",children: [{type: "AdditiveExpression",operator: "&#43;",children: [{type: "AdditiveExpression",children: Array(1)},{type: "&#43;",value: "&#43;"},{type: "MultiplicativeExpression",operator: "*",children: Array(3)}]},{type: "EOF"}]
}
五、解释执行
得到了 AST 之后&#xff0c;只需要对这个树做遍历操作执行即可。
function evaluate(node) {if(node.type &#61;&#61;&#61; "Expression") {return evaluate(node.children[0])}if(node.type &#61;&#61;&#61; "AdditiveExpression") {if(node.operator &#61;&#61;&#61; &#39;-&#39;) {return evaluate(node.children[0]) - evaluate(node.children[2]);}if(node.operator &#61;&#61;&#61; &#39;&#43;&#39;) {return evaluate(node.children[0]) &#43; evaluate(node.children[2]);}return evaluate(node.children[0])}if(node.type &#61;&#61;&#61; "MultiplicativeExpression") {if(node.operator &#61;&#61;&#61; &#39;*&#39;) {return evaluate(node.children[0]) * evaluate(node.children[2]);}if(node.operator &#61;&#61;&#61; &#39;/&#39;) {return evaluate(node.children[0]) / evaluate(node.children[2]);}return evaluate(node.children[0])}if(node.type &#61;&#61;&#61; "Number") {return Number(node.value);}
}