IT戦記

プログラミング、起業などについて書いているプログラマーのブログです😚

ちょっと yacc っぽいもの作ってみる

残り時間 1.5 時間
たぶん出来ない。

定義

終端は、 /^/ で囲んで、 非終端は文字列で表現する。

var y = {
  add: [
    ['mul'], 
    ['add', /^\+/, 'mul'], 
    ['add', /^-/, 'mul']
  ],
  mul: [
    ['una'], 
    ['mul', /^\*/, 'una'], 
    ['mul', /\//, 'una'] 
  ],
  una:[
    ['pri'], 
    [/^\+/, 'una'], 
    [/^-/, 'una']
  ],
  pri:
    [/^1/], 
    [/^\(/, 'add',  /^\)/] 
  ]
};

これから、状態遷移図を作る
あー。ハッシュだとどれがstart かわからんな><
やっぱり $accept は手動で入れることにする。 $end は /^$/ で表現する。

var y = {
    $accept: [
        ['add', /^$/]
    ],
    add: [
        ['mul'], 
        ['add', /^\+/, 'mul'], 
        ['add', /^-/, 'mul']
    ],
    mul: [
        ['una'], 
        ['mul', /^\*/, 'una'], 
        ['mul', /\//, 'una'] 
    ],
    una:[
        ['pri'], 
        [/^\+/, 'una'], 
        [/^-/, 'una']
    ],
    pri: [
        [/^1/], 
        [/^\(/, 'add',  /^\)/] 
    ]
};

終端、非終端を抽出

var terms = [];
var nterms = [];
var tSeen = {};
var nSeen = {};
for (var n in y) {

    y[n].forEach(function(rule) {
        rule.forEach(function(t) {
            if (t instanceof RegExp) {
                if (tSeen[t] != true) {
                    terms.push(t);
                    tSeen[t] = true;
                }
            }
            else {
                if (nSeen[t] != true) {
                    nterms.push(t);
                    nSeen[t] = true;
                }
            }
        });
    });
}
console.log(terms, nterms); // 終端、非終端

ルールオブジェクトを作る

function Rule(nterm, data) {
    this.nterm = nterm;
    this.data = data;
    this.first = data[0];
}

さっきの過程で、 Rule も生成するようにする。

var terms = [];
var nterms = [];
var tSeen = {};
var nSeen = {};

var ntermRuleMap = {};
var rules = [];

for (var n in y) {
    ntermRuleMap[n] = [];
    y[n].forEach(function(rule) {
        rule.forEach(function(t) {
            if (t instanceof RegExp) {
                if (tSeen[t] != true) {
                    terms.push(t);
                    tSeen[t] = true;
                }
            }
            else {
                if (nSeen[t] != true) {
                    nterms.push(t);
                    nSeen[t] = true;
                }
            }
        });
        var r = new Rule(n, rule);
        rules.push(r);
        ntermRuleMap[n].push(r);
    });
}
console.log(terms, nterms, rules, ntermRuleMap);

こんな感じ

非終端の最初に出現する非終端をまとめる

var ntermDeriveMap = {};

nterms.forEach(function(nterm) {
    ntermDeriveMap[nterm] = {};
    ntermRuleMap[nterm].forEach(function(rule) {
        if (!(rule.first instanceof RegExp)) {
            ntermDeriveMap[nterm][rule.first] = true;
        }
    });
});
console.log(ntermDeriveMap);

よしよし

推移閉包を求める

nterms.forEach(function(nterm0) {
    for (var nterm1 in ntermDeriveMap[nterm0]) {
        nterms.forEach(function(nterm2) {
            if (ntermDeriveMap[nterm2][nterm0]) {
                ntermDeriveMap[nterm2][nterm1] = true;
            }
        });
    }
});
console.log(ntermDeriveMap);

おk

ルールを網羅的にしたようなものを作る

var ntermDeriveRuleMap = {};
nterms.forEach(function(nterm) {
    ntermDeriveRuleMap[nterm] = [];
    rules.forEach(function(rule) {
        if (ntermDeriveMap[nterm][rule.nterm]) {
            ntermDeriveRuleMap[nterm].push(rule);
        }
    });
});
console.log(ntermDeriveRuleMap);

おk

あと 30 分
・・・ごくり

状態を作る

State オブジェクトを定義しておく

function State() {
    this.mainRulesData = [];
    this.reductions = [];
    this.rulesAndPositions = [];
    this.rulesAndPositionsSeen = {};
    this.termTransitions = {};
    this.ntermTransitions = {};
    this.rrConflict = false;
    this.srConflict = false;
    this.containReduction = false;
}
State.all = [];
State.hash = {};
State.prototype.add = function(rule, position) {
    if (this.rulesAndPositionsSeen[rule.id + '-' + position]) {
        return;
    }
    this.rulesAndPositionsSeen[rule.id + '-' + position] = true;
    this.mainRulesData.push(rule.id + '-' + position);
    if (rule.data.length == position) {
        if (this.containReduction) {
            this.rrConflict = true;
        }
        this.containReduction = true;
        this.reductions.push(rule);
    }
    else if (this.containReduction) {
        this.srConflict = true;
    }
    this.rulesAndPositions.push([rule, position]);
    if (rule.data[position] != undefined && !(rule.data[position] instanceof RegExp)) {
        var nterm = rule.data[position];
        var self = this;
        ntermDeriveRuleMap[nterm].forEach(function(rule) {
            if (!self.rulesAndPositionsSeen[rule.id + '-' + 0]) {
                self.rulesAndPositionsSeen[rule.id + '-' + 0] = true;
                self.rulesAndPositions.push([rule, 0]);
            }
        });
    }
};
State.prototype.createTransitions = function() {
    State.all.push(this);
    this.id = this.mainRulesData.sort().join(':');
    State.hash[this.id] = this;
    console.log(State.hash);
    var termMap = {};
    var ntermMap = {};
    this.rulesAndPositions.forEach(function(ruleAndPositions) {
        var rule = ruleAndPositions[0];
        var position = ruleAndPositions[1];
        if (rule.data.length > position) {
            if (rule.data[position] instanceof RegExp) {
                termMap[rule.data[position]] = termMap[rule.data[position]] ||  [];
                termMap[rule.data[position]].push([rule, position + 1]);
            }
            else {
                ntermMap[rule.data[position]] = ntermMap[rule.data[position]] ||  [];
                ntermMap[rule.data[position]].push([rule, position + 1]);
            }
        }
    });

    console.log(termMap, ntermMap);


    for (var n in termMap) {
        var state = State.hash[termMap[n].map(function(rp) { return rp[0].id + '-' + rp[1] }).sort().join(':')];
        if (!state) {
            state = new State();
            var rps = termMap[n];
            rps.forEach(function(rp) {
                state.add(rp[0], rp[1]);
            });
            state.createTransitions();
        }
        this.termTransitions[n] = state;
    }
    for (var n in ntermMap) {
        var state = State.hash[ntermMap[n].map(function(rp) { return rp[0].id + '-' + rp[1] }).sort().join(':')];
        if (!state) {
            state = new State();
            var rps = ntermMap[n];
            rps.forEach(function(rp) {
                state.add(rp[0], rp[1]);
            });
            state.createTransitions();
        }
        this.ntermTransitions[n] = state;
    }
};

これで

var firstRule = ntermRuleMap['$accept'][0];
var firstState = new State();

firstState.add(firstRule, 0);
firstState.createTransitions();
console.log(firstState);

とすれば、再帰的にすべての状態が作られるはず

console.log(State.all.length); // 22

ちゃんと 22 個の状態が出来た

ここで時間だ

先読み終端の検出は後で

ここまでの全ソース

<script >
var y = {
    $accept: [
        ['add', /^$/]
    ],
    add: [
        ['mul'], 
        ['add', /^\+/, 'mul'], 
        ['add', /^-/, 'mul']
    ],
    mul: [
        ['una'], 
        ['mul', /^\*/, 'una'], 
        ['mul', /\//, 'una'] 
    ],
    una:[
        ['pri'], 
        [/^\+/, 'una'], 
        [/^-/, 'una']
    ],
    pri: [
        [/^1/], 
        [/^\(/, 'add',  /^\)/] 
    ]
};


function Rule(nterm, data) {
    this.nterm = nterm;
    this.data = data;
    this.first = data[0];
    this.id = Rule.id++;
}
Rule.id = 0;

function State() {
    this.mainRulesData = [];
    this.reductions = [];
    this.rulesAndPositions = [];
    this.rulesAndPositionsSeen = {};
    this.termTransitions = {};
    this.ntermTransitions = {};
    this.rrConflict = false;
    this.srConflict = false;
    this.containReduction = false;
}
State.all = [];
State.hash = {};
State.prototype.add = function(rule, position) {
    if (this.rulesAndPositionsSeen[rule.id + '-' + position]) {
        return;
    }
    this.rulesAndPositionsSeen[rule.id + '-' + position] = true;
    this.mainRulesData.push(rule.id + '-' + position);
    if (rule.data.length == position) {
        if (this.containReduction) {
            this.rrConflict = true;
        }
        this.containReduction = true;
        this.reductions.push(rule);
    }
    else if (this.containReduction) {
        this.srConflict = true;
    }
    this.rulesAndPositions.push([rule, position]);
    if (rule.data[position] != undefined && !(rule.data[position] instanceof RegExp)) {
        var nterm = rule.data[position];
        var self = this;
        ntermDeriveRuleMap[nterm].forEach(function(rule) {
            if (!self.rulesAndPositionsSeen[rule.id + '-' + 0]) {
                self.rulesAndPositionsSeen[rule.id + '-' + 0] = true;
                self.rulesAndPositions.push([rule, 0]);
            }
        });
    }
};
State.prototype.createTransitions = function() {
    State.all.push(this);
    this.id = this.mainRulesData.sort().join(':');
    State.hash[this.id] = this;
    console.log(State.hash);
    var termMap = {};
    var ntermMap = {};
    this.rulesAndPositions.forEach(function(ruleAndPositions) {
        var rule = ruleAndPositions[0];
        var position = ruleAndPositions[1];
        if (rule.data.length > position) {
            if (rule.data[position] instanceof RegExp) {
                termMap[rule.data[position]] = termMap[rule.data[position]] ||  [];
                termMap[rule.data[position]].push([rule, position + 1]);
            }
            else {
                ntermMap[rule.data[position]] = ntermMap[rule.data[position]] ||  [];
                ntermMap[rule.data[position]].push([rule, position + 1]);
            }
        }
    });

    console.log(termMap, ntermMap);


    for (var n in termMap) {
        var state = State.hash[termMap[n].map(function(rp) { return rp[0].id + '-' + rp[1] }).sort().join(':')];
        if (!state) {
            state = new State();
            var rps = termMap[n];
            rps.forEach(function(rp) {
                state.add(rp[0], rp[1]);
            });
            state.createTransitions();
        }
        this.termTransitions[n] = state;
    }
    for (var n in ntermMap) {
        var state = State.hash[ntermMap[n].map(function(rp) { return rp[0].id + '-' + rp[1] }).sort().join(':')];
        if (!state) {
            state = new State();
            var rps = ntermMap[n];
            rps.forEach(function(rp) {
                state.add(rp[0], rp[1]);
            });
            state.createTransitions();
        }
        this.ntermTransitions[n] = state;
    }
};

var terms = [];
var nterms = [];
var tSeen = {};
var nSeen = {};

var ntermRuleMap = {};
var rules = [];

for (var n in y) {
    if (nSeen[n] != true) {
        nterms.push(n);
        nSeen[n] = true;
    }
    ntermRuleMap[n] = [];
    y[n].forEach(function(rule) {
        rule.forEach(function(t) {
            if (t instanceof RegExp) {
                if (tSeen[t] != true) {
                    terms.push(t);
                    tSeen[t] = true;
                }
            }
        });
        var r = new Rule(n, rule);
        rules.push(r);
        ntermRuleMap[n].push(r);
    });
}
console.log(terms, nterms, rules, ntermRuleMap);

var ntermDeriveMap = {};

nterms.forEach(function(nterm) {
    ntermDeriveMap[nterm] = {};
    ntermRuleMap[nterm].forEach(function(rule) {
        if (!(rule.first instanceof RegExp)) {
            ntermDeriveMap[nterm][rule.first] = true;
        }
    });
});
console.log(ntermDeriveMap);

nterms.forEach(function(nterm0) {
    for (var nterm1 in ntermDeriveMap[nterm0]) {
        nterms.forEach(function(nterm2) {
            if (ntermDeriveMap[nterm2][nterm0]) {
                ntermDeriveMap[nterm2][nterm1] = true;
            }
        });
    }
    ntermDeriveMap[nterm0][nterm0] = true;
});
console.log(ntermDeriveMap);


var ntermDeriveRuleMap = {};
nterms.forEach(function(nterm) {
    ntermDeriveRuleMap[nterm] = [];
    rules.forEach(function(rule) {
        if (ntermDeriveMap[nterm][rule.nterm]) {
            ntermDeriveRuleMap[nterm].push(rule);
        }
    });
});
console.log(ntermDeriveRuleMap);

var firstRule = ntermRuleMap['$accept'][0];
var firstState = new State();

firstState.add(firstRule, 0);
firstState.createTransitions();
console.log(firstState);
console.log(State.all.length);

</script>

続き

トークンの先読み。

State.all.forEach(function(state0) {
    if (state0.srConflict || state0.ssConflict) {
        State.all.forEach(function(state1) {
            state0.reductions.forEach(function(red) {
                if (state1.ntermTransitions[red[0].nterm]) {
                    for (var n in state1.ntermTransitions[red[0].nterm].termTransitions) {
                        if (!red[2][n]) {
                            red[1].push(eval(n)); // dirty >_<
                            red[2][n] = true;
                        }
                    }
                }
            });
        });
    }
});

console.log(firstState);

できてるっぽい