経過をここに晒していくよ!
ヽ(゚∀゚)ノウンコー!
lexer を考える
ここを参考に考えるよ!
今回作るやつは
- Unicode は考えない!
- オフサイドルールなし!
- 8 進数なし!
- 16 進数なし!
- 指数(0.23e10 みたいなやつ)なし!
- 文字列内の \ で改行を無効にするやつなし!
- コメントなし!(ちょw
- 全部正規表現使うよ!
工エエェェ(´д`)ェェエエ工
気にしない!
正規表現を考えるよ!
var regexps = { digit: /\d/, decimal: /\d+/, float: /\d+\.\d+/, integer: /\d+/, char: /'(?:\\(?:[abfnrtv\\"']|\d+)|[^'])'/, string: /"(?:\\(?:[abfnrtv&\\"']|\d+)|[^"])*"/, symbol: /[!#$%&*+\.\/<=>\?@^|-~\\]/, small: /[a-z_]/, large: /[A-Z]/, varid: /[a-z_]\w*/, varsym: /[!#$%&*+\.\/<=>\?@^|-~\\]+/ conid: /[A-Z]\w*/, consym: /:[!#$%&*+\.\/<=>\?@^|-~\\]+:/, special: /\(|\)|,|;|\[|\]|`|{|}/, reservedop: /=>|\.\.|<-|->|::|:|=|\\|\||@|~/, literal: /\d+(?:\.\d+)?|'(?:\\(?:[abfnrtv\\"']|\d+)|[^'])'|"(?:\\(?:[abfnrtv&\\"']|\d+)|[^"])*"/, 'qvarid or reservedid': /(?:[A-Z]\w*\.)?[a-z_]\w*/, qconid: /(?:[A-Z]\w*\.)?[A-Z]\w*/, qvarsym: /(?:[A-Z]\w*\.)?[!#$%&*+\.\/<=>\?@^|-~\\]+/ qconsym: /(?:[A-Z]\w*\.)?:[!#$%&*+\.\/<=>\?@^|-~\\]+:/ 'qvarid or reservedid or qconid': /(?:[A-Z]\w*\.)?[A-Za-z_]\w*/, 'qvarsym or qconsym': /(?:[A-Z]\w*\.)?(?::[!#$%&*+\.\/<=>\?@^|-~\\]+:|[!#$%&*+\.\/<=>\?@^|-~\\]+)/, 'qvarsym or qconsym or reservedop': /=>|\.\.|<-|->|::|(?:[A-Z]\w*\.)?(?::[!#$%&*+\.\/<=>\?@^|-~\\]+:|[!#$%&*+\.\/<=>\?@^|-~\\]+)|:/, program: /\d+(?:\.\d+)?|'(?:\\(?:[abfnrtv\\"']|\d+)|[^'])'|"(?:\\(?:[abfnrtv&\\"']|\d+)|[^"])*"|=>|\.\.|<-|->|::|(?:[A-Z]\w*\.)?([A-Za-z_]\w*|:[!#$%&*+\.\/<=>\?@^|-~\\]+:|[!#$%&*+\.\/<=>\?@^|-~\\]+)|./, }
できたー゚+.(・∀・)゚+.゚
var source; // <- この source に Haskell のコードを入れる // これでトークナイズできるお! var tokens = source.match(/\d+(?:\.\d+)?|'(?:\\(?:[abfnrtv\\"']|\d+)|[^'])'|"(?:\\(?:[abfnrtv&\\"']|\d+)|[^"])*"|=>|\.\.|<-|->|::|(?:[A-Z]\w*\.)?([A-Za-z_]\w*|:[!#$%&*+\.\/<=>\?@^|-~\\]+:|[!#$%&*+\.\/<=>\?@^|-~\\]+)|./g);
今回は kmyacc を使うから yylex を作るよ
var tokens; function prepareTokens(source) { // トークナイズするよ! tokens = source.match(/\d+(?:\.\d+)?|'(?:\\(?:[abfnrtv\\"']|\d+)|[^'])'|"(?:\\(?:[abfnrtv&\\"']|\d+)|[^"])*"|=>|\.\.|<-|->|::|(?:[A-Z]\w*\.)?([A-Za-z_]\w*|:[!#$%&*+\.\/<=>\?@^|-~\\]+:|[!#$%&*+\.\/<=>\?@^|-~\\]+)|./g); // 空白消すよ! (Firefox only tokens = tokens.filter(/[^\s]/); } function yylex() { var token = tokens.shift(); yylex = token; if (token == undefined) return 0; // keyword switch (token) { case "infixl": return INFIXL; case "infixr": return INFIXR; case "infix": return INFIXN; case "instance": return TINSTANCE; case "class": return TCLASS; case "primitive": return PRIMITIVE; case "case": return CASEXP; case "of": return OF; case "if": return IF; case "then": return THEN; case "else": return ELSE; case "where": return WHERE; case "type": return TYPE; case "data": return DATA; case "newtype": return TNEWTYPE; case "let": return LET; case "in": return IN; case "deriving": return DERIVING; case "default": return DEFAULT; case "import": return IMPORT; case "module": return TMODULE; case "hiding": return HIDING; case "qualified": return QUALIFIED; case "as": return ASMOD; } // literal if (token.match(/^\d+(?:\.\d+)?$/)) return NUMLIT; if (token.match(/^'(?:\\(?:[abfnrtv\\"']|\d+)|[^'])'$/)) return CHARLIT; if (token.match(/^"(?:\\(?:[abfnrtv&\\"']|\d+)|[^"])*"$/)) return STRINGLIT; // var or con if (token.match(/^[!#$%&*+\.\/<=>\?@^|-~\\]+$/)) return VAROP; if (token.match(/^[a-z_]\w*$/)) return VARID; if (token.match(/^:[!#$%&*+\.\/<=>\?@^|-~\\]+:$/)) return CONOP; if (token.match(/^[A-Z]\w*$/)) return CONID; // qvar or qcon if (token.match(/^(?:[A-Z]\w*\.)?[!#$%&*+\.\/<=>\?@^|-~\\]+$/)) return QVAROP; if (token.match(/^(?:[A-Z]\w*\.)?[a-z_]\w*$/)) return QVARID; if (token.match(/^(?:[A-Z]\w*\.)?:[!#$%&*+\.\/<=>\?@^|-~\\]+:$/)) return QCONOP; if (token.match(/^(?:[A-Z]\w*\.)?[A-Z]\w*$/)) return QCONID; // op if (token == '::') return COCO; if (token == '..') return UPTO; if (token == '->') return ARROW; if (token == '<-') return FROM; if (token == '=>') return IMPLIES; return token.charCodeAt(0); }
もう正規表現書きたくない(゚∀゚)=3ムッハー
parser を考える
Ψ(゚д゚)Ψ「あ」<(゚д゚)>「い」(q゚з゚)p「う」∠( ゚д゚)/「え」(┌゚Д゚)┌「お」
kmyacc 使うよ
%token を定義するよ
Hugs のソースから持って来たよ
%token CASEXP OF DATA TYPE IF %token THEN ELSE WHERE LET IN %token INFIXN INFIXL INFIXR PRIMITIVE TNEWTYPE %token DEFAULT DERIVING DO TCLASS TINSTANCE %token NUMLIT CHARLIT STRINGLIT %token VAROP VARID CONOP CONID %token QVAROP QVARID QCONOP QCONID %token COCO '=' UPTO '@' '\\' %token '|' '-' FROM ARROW '~' %token '!' IMPLIES '(' ',' ')' %token '[' ';' ']' '`' '.' %token TMODULE IMPORT HIDING QUALIFIED ASMOD
yacc のルールを書いていくよくるよ
start: module | modBody ; module: TMODULE modname expspec WHERE '{' modBody '}' ; modname: qconid ; modid: qconid | STRINGLIT ; modBody: // empty | ';' modBody | topDecls | impDecls | impDecls ';' topDecls ; expspec: // empty | '(' ')' | '(' ',' ')' | '(' exports ')' | '(' exports ',' ')' ; exports: exports ',' export | export ; export: qvar | qcon | qconid '(' UPTO ')' | qconid '(' qnames ')' | TMODULE modid ; qnames: // empty | ',' | qnames1 | qnames1 ',' ; qnames1: qnames1 ',' qname | qname ; qname: qvar | qcon ; impDecls: impDecls ';' impDecl | impDecls ';' | impDecl ; impDecl: IMPORT modid impspec | IMPORT modid ASMOD modid impspec | IMPORT QUALIFIED modid ASMOD modid impspec | IMPORT QUALIFIED modid impspec ; impspec: // empty | HIDING '(' imports ')' | '(' imports ')' ; imports: // empty | ',' | imports1 | imports1 ',' ; imports1: imports1 ',' import | import ; import: var | CONID | CONID '(' UPTO ')' | CONID '(' names ')' ; names: // empty | ',' | names1 | names1 ',' ; names1: names1 ',' name | name ; name: var | con ; topDecl: TYPE tyLhs '=' type | TYPE tyLhs '=' type IN invars | DATA btype2 '=' constrs deriving | DATA context IMPLIES tyLhs '=' constrs deriving | DATA btype2 | DATA context IMPLIES tyLhs | TNEWTYPE btype2 '=' constr deriving | TNEWTYPE context IMPLIES tyLhs '=' constr deriving | PRIMITIVE prims COCO topType | TCLASS crule fds wherePart | TINSTANCE irule wherePart | DEFAULT '(' dtypes ')' ; tyLhs: tyLhs varid | CONID ; invars: invars ',' invar | invar ; invar: var COCO topType | var ; constrs: constrs '|' constr | constr ; constr: '!' btype conop bbtype | btype1 conop bbtype | btype2 conop bbtype | polyType conop bbtype | btype2 | btype3 | con '{' fieldspecs '}' | con '{' '}' ; btype3: btype2 '!' atype | btype2 polyType | btype3 atype | btype3 '!' atype | btype3 polyType | '(' CONOP ')' ; bbtype: '!' btype | btype | polyType ; fieldspecs: fieldspecs ',' fieldspec | fieldspec ; fieldspec: vars COCO polyType | vars COCO type | vars COCO '!' type ; deriving: // empty | DERIVING qconid | DERIVING '(' derivs0 ')' ; derivs0: // empty | derivs ; derivs : derivs ',' qconid | qconid ; prims: prims ',' prim | prim ; prim: var STRINGLIT | var ; crule: context IMPLIES btype2 | btype2 ; irule: context IMPLIES btype2 | btype2 ; dtypes: // empty | dtypes1 ; dtypes1: dtypes1 ',' type | type ; fds: // empty | '|' fds1 ; fds1: fds1 ',' fd | fd ; fd: varids0 ARROW varids0 ; varids0: // empty | varids0 varid ; topType : context IMPLIES topType0 | topType0 ; topType0 : polyType ARROW topType0 | btype1 ARROW topType0 | btype2 ARROW topType0 | btype ; polyType: polyType : '(' polyType ')' | '(' lcontext IMPLIES type ')' ; varids: varids varid | varid ; sigType: context IMPLIES type | type ; context: '(' ')' | btype2 | '(' btype2 ')' | '(' btypes2 ')' | lacks | '(' lacks1 ')' ; lcontext: lacks | '(' lacks1 ')' ; lacks: varid '\\' varid ; lacks1: btypes2 ',' lacks | lacks1 ',' btype2 | lacks1 ',' lacks | btype2 ',' lacks | lacks ; type: type1 | btype2 ; type1: btype1 | bpolyType ARROW type | btype1 ARROW type | btype2 ARROW type ; btype: btype1 | btype2 ; btype1: btype1 atype | atype1 ; btype2: btype2 atype | qconid ; atype: atype1 | qconid ; atype1: varid | '(' ')' | '(' ARROW ')' | '(' type1 ')' | '(' btype2 ')' | '(' tupCommas ')' | '(' btypes2 ')' | '(' typeTuple ')' | '(' tfields ')' | '[' type ']' | '[' ']' | '_' ; btypes2: btypes2 ',' btype2 | btype2 ',' btype2 ; typeTuple: type1 ',' type | btype2 ',' type1 | btypes2 ',' type1 | typeTuple ',' type ; gendecl : INFIXN optDigit ops | INFIXL optDigit ops | INFIXR optDigit ops | vars COCO topType ; optDigit : NUMLIT | // empty ; ops : ops ',' op | op ; vars: vars ',' var | var ; decls: '{' decls0 '}' | '{' decls1 '}' ; decls0: // empty | decls0 ';' | decls1 ';' ; decls1: decls0 decl ; decl: gendecl | funlhs rhs | funlhs COCO type rhs | pat0 rhs ; funlhs: funlhs0 | funlhs1 | npk ; funlhs0: pat10_vI varop pat0 | infixPat varop pat0 | NUMLIT varop pat0 | var varop_pl pat0 | var '+' pat0_INT ; funlhs1: '(' funlhs0 ')' apat | '(' funlhs1 ')' apat | '(' npk ')' apat | var apat | funlhs1 apat ; rhs: rhs1 wherePart ; rhs1: '=' exp | gdrhs ; gdrhs: gdrhs gddef | gddef ; gddef: '|' exp0 '=' exp ; wherePart : // empty | WHERE decls ; lwherePart : // empty | WHERE ldecls ; ldecls: '{' ldecls0 '}' | '{' ldecls1 '}' ; ldecls0: // empty | ldecls0 ';' | ldecls1 ';' ; ldecls1: ldecls0 ldecl ; ldecl: decl ; pat: npk | pat_npk ; pat_npk: pat0 COCO type | pat0 ; npk: var '+' NUMLIT ; pat0: var | NUMLIT | pat0_vI ; pat0_INT: var | pat0_vI ; pat0_vI: pat10_vI | infixPat ; infixPat: '-' pat10 | var qconop pat10 | var qconop '-' pat10 | NUMLIT qconop pat10 | NUMLIT qconop '-' pat10 | pat10_vI qconop pat10 | pat10_vI qconop '-' pat10 | infixPat qconop pat10 | infixPat qconop '-' pat10 ; pat10: fpat | apat ; pat10_vI: fpat | apat_vI ; fpat: fpat apat | gcon apat ; apat: NUMLIT | var | apat_vI ; apat_vI: var '@' apat | gcon | qcon '{' patbinds '}' | CHARLIT | STRINGLIT | '_' | '(' pat_npk ')' | '(' npk ')' | '(' pats2 ')' | '[' pats1 ']' | '~' apat ; pats2: pats2 ',' pat | pat ',' pat ; pats1: pats1 ',' pat | pat ; patbinds : // empty | patbinds1 ; patbinds1: patbinds1 ',' patbind | patbind ; patbind: qvar '=' pat | var ; exp: exp0a COCO sigType | exp0 ; exp0: exp0a | exp0b ; exp0a: infixExpa | exp10a ; exp0b: infixExpb | exp10b ; infixExpa : infixExpa qop '-' exp10a | infixExpa qop exp10a | '-' exp10a | exp10a qop '-' exp10a | exp10a qop exp10a ; infixExpb : infixExpa qop '-' exp10b | infixExpa qop exp10b | '-' exp10b | exp10a qop '-' exp10b | exp10a qop exp10b ; exp10a : CASEXP exp OF '{' alts '}' | DO '{' stmts '}' | LET ldecls IN exp | IF exp then_exp else_exp ; exp10b : '\\' pats ARROW exp | LET ldecls IN exp | IF exp then_exp else_exp ; then_exp : ';' THEN exp | THEN exp ; else_exp : ';' ELSE exp | ELSE exp ; pats : pats apat | apat ; appExp : appExp aexp | aexp ; aexp : qvar | qvar '@' aexp | '~' aexp | '_' | gcon | qcon '{' fbinds '}' | aexp '{' fbinds '}' | NUMLIT | CHARLIT | STRINGLIT | '(' exp ')' | '(' exps2 ')' | '[' list ']' | '(' exp10a qop ')' | '(' qvarop_mi exp0 ')' | '(' qconop exp0 ')' ; exps2 : exps2 ',' exp | exp ',' exp ; alts : alts1 | ';' alts ; alts1 : alts1 ';' alt | alts1 ';' | alt ; alt : pat altRhs wherePart ; altRhs : guardAlts | ARROW exp ; guardAlts : guardAlts guardAlt | guardAlt ; guardAlt : '|' exp0 ARROW exp ; stmts : stmts1 | ';' stmts ; stmts1 : stmts1 ';' stmt | stmts1 ';' | stmt ; stmt : exp FROM exp | LET ldecls | exp ; fbinds: // empty | fbinds1 ; fbinds1 : fbinds1 ',' fbind | fbind ; fbind: var | qvar '=' exp ; list : exp | exps2 | exp zipquals | exp UPTO exp | exp ',' exp UPTO | exp UPTO | exp ',' exp UPTO exp ; zipquals : zipquals '|' quals | '|' quals ; quals : quals ',' qual | qual ; qual : exp FROM exp | exp | LET ldecls ; gcon : qcon | '(' ')' | '[' ']' | '(' tupCommas ')' ; tupCommas : tupCommas ',' | ',' ; varid : VARID | HIDING | QUALIFIED | ASMOD ; qconid : QCONID | CONID ; var : varid | '(' VAROP ')' | '(' '+' ')' | '(' '-' ')' | '(' '!' ')' | '(' '.' ')' ; qvar : QVARID | '(' QVAROP ')' | var ; con : CONID | '(' CONOP ')' ; qcon : QCONID | '(' QCONOP ')' | con ; varop : '+' | '-' | varop_mipl ; varop_mi : '+' | varop_mipl ; varop_pl : '-' | varop_mipl ; varop_mipl: VAROP | '`' varid '`' | '!' | '.' ; qvarop : '-' | qvarop_mi ; qvarop_mi : QVAROP | '`' QVARID '`' | varop_mi ; conop : CONOP | '`' CONID '`' ; qconop : QCONOP | '`' QCONID '`' | conop ; op : varop | conop ; qop : qvarop | qconop ;
Hugs のをほぼ、写経してみたが旨くいかない。。。