最近尝试做了文法分析的东东,问题较多。
请提建议。代码放不下,分两页。
下载地址 http://download.csdn.net/detail/xuzuning/4529066
include 'ttrie.php'; class Rule extends TTrie { public $rule = array(); public $savematch = 0; function __construct($s='') { $this->set( array( ' ' => 'Separated', "\r\n" => 'set_rule', "\n" => 'set_rule', "\t" => 'Separated', '->' => 'Separated', '→' => 'Separated', '|' => 'set_parallel_rule', )); $this->match($s); if($this->rule[0][0] == $this->rule[0][1]) { if(count($this->rule[0]) == 2) $this->rule[0][0] .= "'"; else array_unshift($this->rule, array($this->rule[0][0]."'", $this->rule[0][0])); }else { $c = $this->rule[0][0]; $n = 0; foreach($this->rule as $r) if($r[0] == $c) $n++; if($n > 1) array_unshift($this->rule, array($this->rule[0][0]."'", $this->rule[0][0])); } } function Separated() { } function set_rule() { $this->rule[] = $this->buffer; $this->buffer = array(); } function set_parallel_rule() { $t = $this->buffer[0]; $this->set_rule(); $this->buffer[] = $t; } } class Grammar { var $closure = array(); var $first = array(); var $follow = array(); var $rule = array(); var $identifier = array(); var $leay = array(); var $forecast = array(); var $stack = array(); var $ll = 'LL(0)'; var $lr = 'LR(0)'; function __construct($s='') { $p = new Rule($s); $this->rule = $p->rule; $this->set_grammar(); } function set_grammar() { foreach($this->rule as $rule) { if(! in_array($rule[0], $this->identifier)) $this->identifier[] = $rule[0]; } foreach($this->rule as $rule) { foreach($rule as $v) if(! in_array($v, $this->identifier) && ! in_array($v, $this->leay)) $this->leay[] = $v; } $this->set_first(); $this->set_follow(); $this->set_closure(); $this->set_select(); $this->set_forecast(); } function set_first() { foreach($this->rule as $rule) $this->first[$rule[0]] = array(); //直接收取 形如U->a…的产生式(其中a是终结符),把a收入到First(U)中 foreach($this->rule as $v) { if(in_array($v[1], $this->leay)) $this->first[$v[0]][] = $v[1]; } //反复传递 形入U->P1P2P3…Pn的产生式(其中P是非终结符),应先把First(P1)中的全部内容传送到First(U)中,如果P1中有ε,把First(P2)中的内容传送到First(U)中,类推直到Pi中无ε do { $t = serialize($this->first); foreach($this->rule as $rule) { for($i=1; $iidentifier)) { $this->first[$rule[0]] = array_unique(array_merge($this->first[$rule[0]], $this->first[$v])); if(! in_array('#', $this->first[$v])) break; }else break; } } }while($t != serialize($this->first)); } function set_follow() { foreach($this->rule as $rule) $this->follow[$rule[0]] = array(); //直接收取 形如 …Ua… 的,把a直接收入到Follow(U)中 foreach($this->rule as $rule) { for($i=1; $i identifier) && in_array($rule[$i+1], $this->leay)) $this->follow[$rule[$i]][] = $rule[$i+1]; } if(in_array($rule[$i], $this->identifier)) $this->follow[$rule[$i]][] = '#'; } foreach($this->follow as &$v) if(! $v) $v[] = '#'; //直接收取 形如 …UP…(P是非终结符)的,把First(P)中非ε收入到Follow(U)中 foreach($this->rule as $rule) { for($i=1; $i identifier) && in_array($rule[$i+1], $this->identifier)) { $this->follow[$rule[$i]] = array_unique(array_merge($this->follow[$rule[$i]], array_diff($this->first[$rule[$i+1]], array('#')))); } } } //反复传递 形如U->aP的(P是非终结符)或U->aPQ(P,Q为非终结符且Q中含ε),应把Follow(U)中的全部内容传送到Follow(P)中 do { $t = serialize($this->follow); foreach($this->rule as $rule) { $s = $rule[0]; $d = end($rule); if(in_array($d, $this->leay)) continue; $p = prev($rule); if(in_array($p, $this->leay)) $this->follow[$d] = array_unique(array_merge($this->follow[$d], $this->follow[$s])); elseif(in_array('#', $this->follow[$d])) $this->follow[$p] = array_unique(array_merge($this->follow[$p], $this->follow[$s])); } }while($t != serialize($this->follow)); } function set_closure() { $shift = array(); $this->closure[0][] = array('offs' => 1, 'rule' => 0); for($i=0 ; $i closure); $i++) { $cnt = count($this->closure); //构造闭包 closure $ex = array(); $j = 0; $tmp = array(); do { $size = count($this->closure[$i]); for($j=0; $j closure[$i]); $j++) { $dfa = $this->closure[$i][$j]; $rule = $this->rule[$dfa['rule']]; if(isset($rule[$dfa['offs']])) { $ch = $ex[] = $rule[$dfa['offs']]; } foreach($this->rule as $r=>$rule) { if(in_array($rule[0], $ex)) { $t = array('offs' => 1, 'rule' => $r); if(!isset($tmp[$r][1])) $this->closure[$i][] = $t; $tmp[$r][1] = 1; } } } }while(count($this->closure[$i]) != $size); //直到不再增大 //判断状态转向 go $out = array(); foreach($this->closure[$i] as $k=>$dfa) { $rule = $this->rule[$dfa['rule']]; if(isset($rule[$dfa['offs']])) { $t = "$dfa[rule],$dfa[offs]"; $ch = $rule[$dfa['offs']]; $this->closure[$i][$k]['char'] = $ch; if(isset($out[$ch])) $shift[$t] = $out[$ch]; if(isset($shift[$t])) { $this->closure[$i][$k]['target'] = $shift[$t]; $dfa['offs']++; if(!$this->in_closure($dfa, $this->closure[$shift[$t]])) $this->closure[$shift[$t]][] = $dfa; } else { $cnt = count($this->closure); $this->closure[$i][$k]['target'] = $cnt; $shift[$t] = $cnt; $dfa['offs']++; $this->closure[count($this->closure)][] = $dfa; $out[$ch] = $cnt; } } } //构造状态转换表 foreach($this->closure[$i] as $k=>$dfa) { if(isset($dfa['target'])) { $v = $dfa['char']; if(in_array($v, $this->identifier)) $this->goto[$i][$v] = $dfa['target']; else { $this->action[$i][$v][] = "S$dfa[target]"; $this->request[$i][$v] = $dfa['rule']; } } else { $ch = $this->rule[$dfa['rule']][0]; foreach($this->follow[$ch] as $v) { $this->action[$i][$v][] = "r$dfa[rule]"; $this->request[$i][$v] = $dfa['rule']; } } } foreach($this->action[$i] as $c=>$v) { $v = array_unique($v); if(count($v) > 1) $this->lr = 'SLR(1)'; $this->action[$i][$c] = $v; } } } function in_closure($t, $s) { foreach($s as $r) if($t['offs'] == $r['offs'] && $t['rule'] == $r['rule']) return true; return false; return in_array(serialize($t), array_map('serialize', $s)); } function set_select() { foreach($this->rule as $i=>$rule) { $y = array($rule[1]); if(in_array($y[0], $this->leay)) { if($y[0] != '#') { $this->select[$i] = $y; continue; } }else $y = $this->first[$rule[1]]; $x = $this->follow[$rule[0]]; //SELECT(X->Y)=(FIRST(Y)-{ε})并FOLLOW(X) $this->select[$i] = array_unique( array_merge(array_diff($y, array('#')), $x) ); } } /** * 构造预测分析表 **/ function set_forecast() { foreach($this->select as $i=>$r) { $c = $this->rule[$i][0]; $v = array_reverse(array_slice($this->rule[$i], 1)); foreach($r as $k) { $this->forecast[$c][$k][] = $v; } } //检查冲突 foreach($this->forecast as $c=>$r) { foreach($r as $k) { if(count($k) > 1) { $this->ll = 'LL(1)'; } } } }
续
function ll_start($s) { $t = array(); foreach($this->rule as $rule) if($rule[0] == $rule[1]) $t[] = $rule; if($t) { foreach($t as $rule) printf('', preg_replace('/ /', ' → ', join(' ', $rule), 1)); return; } $stack = array('#', key($this->forecast)); $i = 0; $step = 1; $timeout = 10 * strlen($s); while($stack && $i %s 存在左递归 forecast[$r][$s{$i}])) $msg = $r . ' → ' . join(' ', array_reverse($this->forecast[$r][$s{$i}][0])); else $msg = '错误'; printf(" ", $step++, substr(join('', $stack), -50), substr($s, $i), $msg); if($r == $s{$i}) { array_pop($stack); $i++; }elseif(isset($this->forecast[$r][$s{$i}])) { array_pop($stack); if(current($this->forecast[$r][$s{$i}][0]) != '#') $stack = array_merge($stack, $this->forecast[$r][$s{$i}][0]); }else break; } } function lr_start($s) { $State = array(0); //状态栈 $Symbol = array('#'); //符号栈 $i = 0; $step = 1; $timeout = 10 * strlen($s); while($i %d %s %s %s action[$sp][$ch]) && $this->action[$sp][$ch][0] == 'r0') $msg = 'acc'; if(isset($this->request[$sp][$ch])) $request = preg_replace('/ /', ' → ', join(' ', $this->rule[$this->request[$sp][$ch]]), 1); else $request = 'error'; printf(" ", $step++, substr(join('', $State), -50), join('', $Symbol), $msg, $request); if(isset($this->action[$sp][$ch]) || isset($this->goto[$sp][$ch])) { $t = isset($this->action[$sp][$ch]) ? $this->action[$sp][$ch][0] : $this->goto[$sp][$ch]; $n = substr($t, 1) + 0; if($t{0} == 'r') { for($j=0; $j %d %s %s %s %s rule[$n])-1; $j++) { array_pop($State); array_pop($Symbol); } if($n == 0) break; $c = $Symbol[] = $this->rule[$n][0]; $State[] = $this->goto[end($State)][$c]; }elseif($t{0} == 'S') { $State[] = $n; $Symbol[] = $ch; $i++; }else ; }else break; } } function report($in='') { if($in) $in = trim($in, '#') . '#'; echo ''; echo ' '; $identifier = $this->identifier; echo '
'; foreach($this->rule as $rule) { echo ' 文法 '; } echo ' '; echo preg_replace('/ /', ' → ', join(' ', $rule), 1); echo ' '; $leay = array_diff($this->leay, array('#')); $leay[] = '#'; echo '
'; echo ' 标识符 '; echo '' . join(' ', $identifier) . ' '; echo '
'; echo ' 终结符 '; echo '' . join(' ', $leay) . ' '; echo '
'; foreach($identifier as $ch) { echo ' 标识符 推出空 FIRST集 FOLLOW集 '; } echo ' ' . $ch . ' '; echo '' . (in_array('#', $this->first[$ch]) ? 'True' : 'false') . ' '; echo '' . join(' ', $this->first[$ch]) . ' '; echo '' . join(' ', $this->follow[$ch]) . ' '; echo ''; echo '
' . $this->ll . '文法分析 '; $forecast = $this->forecast; echo '
'; foreach($this->rule as $i=>$rule) { echo ' 产生式 Select集 '; echo ' '; } echo '' . preg_replace('/ /', ' → ', join(' ', $rule), 1) . ' '; echo '' . join(' ', $this->select[$i]) . ' '; echo ''; if($in) { echo '
'; echo ' 预测分析表 '; echo ' '; echo '
'; foreach($identifier as $ch) { echo ' ' . join(' ', $leay) . ' ' . $ch . ' '; foreach($leay as $v) { $s = ''; if(isset($forecast[$ch][$v])) { foreach($forecast[$ch][$v] as $t) { if($s) $s .= '
'; $s .= $ch . ' → '. join(' ', array_reverse($t)); } if(count($forecast[$ch][$v]) > 1) $s = "$s"; }else $s .= ' '; echo '' . $s . ' '; } echo ''; } echo ' '; echo '
测试字符串 '; echo '' . trim($in, '#') . ' '; echo '
'; } echo ''; $this->ll_start($in); echo ' 步骤 分析栈 剩余字符 生成式或匹配 '; echo '
'; echo ' ' . $this->lr . '文法分析 '; echo '
状态转移表 '; echo '
'; if($in) { echo ''; echo ' 状态 Action Goto DFA '; foreach($this->action as $i=>$item) { echo " ' . join(' ', $leay) . ' '. join(' ', $identifier) . ' '; } echo ' $i "; foreach($leay as $v) { $s = isset($item[$v]) ? join(',', $item[$v]) : ' '; if(strpos($s, ',')) $s = "$s"; echo "$s "; } foreach($identifier as $v) { $s = isset($this->goto[$i][$v]) ? $this->goto[$i][$v] : ' '; echo "$s "; } echo '' . $this->showDFA($i) .' '; echo ''; echo '
测试字符串 '; echo '' . trim($in, '#') . ' '; echo '
&#39;; } } function showDFA($i) { $res = array(); foreach($this->closure[$i] as $dfa) { $rule = $this->rule[$dfa[&#39;rule&#39;]]; array_splice($rule, $dfa[&#39;offs&#39;], 0, &#39;·&#39;); array_splice($rule, 1, 0, &#39;→&#39;); if(isset($dfa[&#39;target&#39;])) $rule[] = " [$dfa[target]]"; $res[] = join(&#39;&#39;, $rule); } return join(&#39;; &#39;, $res); } } for($i=1; $i<=count(glob(&#39;*.txt&#39;)); $i++) echo " $i"; $n = current($_GET) or $n = 1; $S = &#39;&#39;; include "$n.txt"; $p = new grammar($G); $p->report($S);&#39;; $this->lr_start($in); echo &#39; 步骤 状态栈 符号栈 剩余字符 生成式
ttrie.php
$v) $this->set($k, $v); return; } $p = count($this->dict); $cur = 0; //当前节点号 foreach(str_split($word) as $c) { if (isset($this->dict[$cur][$c])) { //已存在就下移 $cur = $this->dict[$cur][$c]; continue; } $this->dict[$p]= array(); //创建新节点 $this->dict[$cur][$c] = $p; //在父节点记录子节点号 $cur = $p; //把当前节点设为新插入的 $p++; } $this->dict[$cur][&#39;acc&#39;] = $action; //一个词结束,标记叶子节点 } function match($s) { $ret = array(); $cur = 0; //当前节点,初始为根节点 $i =& $this->input; //字符串当前偏移 $p =& $this->backtracking; //字符串回溯位置 $s .= "\0"; //附加结束符 $len = strlen($s); $buf = &#39;&#39;; while($i <$len) { $c = $s{$i}; if(isset($this->dict[$cur][$c])) { //如果存在 $cur = $this->dict[$cur][$c]; //转到对应的位置 if(isset($this->dict[$cur][$s[$i+1]])) {//检查下一个字符是否也能匹配,长度优先 $i++; continue; } if(isset($this->dict[$cur][&#39;acc&#39;])) { //是叶子节点,单词匹配! if($buf) { $this->buffer[] = $buf; $buf = &#39;&#39;; } if($this->savematch) $this->buffer[] = substr($s, $p, $i - $p + 1); //取出匹配位置和匹配的词 $ar = explode(&#39;,&#39;, $this->dict[$cur][&#39;acc&#39;]); call_user_func_array( array($this, array_shift($ar)), $ar ); $p = $i + 1; //设置下一个回溯位置 $cur = 0; //重置当前节点为根节点 } } else { //不匹配 $buf .= $s{$p}; //substr($s, $p, $i - $p + 1); //保存未匹配位置和未匹配的内容 $cur = 0; //重置当前节点为根节点 $i = $p; //把当前偏移设为回溯位置 $p = $i + 1; //设置下一个回溯位置 } $i++; //下一个字符 } if(trim($buf, "\0")) $this->buffer[] = trim($buf, "\0"); } function __call($method, $param) { if($this->debug) printf("偏移:%d 回溯:%d\n", $this->input, $this->backtracking); } }