経過をここに晒していくよ!
ヽ(゚∀゚)ノウンコー!
lexer を考える
ここを参考に考えるよ!
今回作るやつは
- Unicode は考えない!
- オフサイドルールなし!
- 8 進数なし!
- 16 進数なし!
- 指数(0.23e10 みたいなやつ)なし!
- 文字列内の \ で改行を無効にするやつなし!
- コメントなし!(ちょw
- 全部正規表現使うよ!
工エエェェ(´д`)ェェエエ工
気にしない!
正規表現を考えるよ!
var regexps = { digit: /\d/, decimal: /\d+/, float: /\d+\.\d+/, integer: /\d+/, char: /'(?:\\(?:[abfnrtv\\"']|\d+)|[^'])'/, string: /"(?:\\(?:[abfnrtv&\\"']|\d+)|[^"])*"/, symbol: /[!#$%&*+\.\/<=>\?@^|-~\\]/, small: /[a-z_]/, large: /[A-Z]/, varid: /[a-z_]\w*/, varsym: /[!#$%&*+\.\/<=>\?@^|-~\\]+/ conid: /[A-Z]\w*/, consym: /:[!#$%&*+\.\/<=>\?@^|-~\\]+:/, special: /\(|\)|,|;|\[|\]|`|{|}/, reservedop: /=>|\.\.|<-|->|::|:|=|\\|\||@|~/, literal: /\d+(?:\.\d+)?|'(?:\\(?:[abfnrtv\\"']|\d+)|[^'])'|"(?:\\(?:[abfnrtv&\\"']|\d+)|[^"])*"/, 'qvarid or reservedid': /(?:[A-Z]\w*\.)?[a-z_]\w*/, qconid: /(?:[A-Z]\w*\.)?[A-Z]\w*/, qvarsym: /(?:[A-Z]\w*\.)?[!#$%&*+\.\/<=>\?@^|-~\\]+/ qconsym: /(?:[A-Z]\w*\.)?:[!#$%&*+\.\/<=>\?@^|-~\\]+:/ 'qvarid or reservedid or qconid': /(?:[A-Z]\w*\.)?[A-Za-z_]\w*/, 'qvarsym or qconsym': /(?:[A-Z]\w*\.)?(?::[!#$%&*+\.\/<=>\?@^|-~\\]+:|[!#$%&*+\.\/<=>\?@^|-~\\]+)/, 'qvarsym or qconsym or reservedop': /=>|\.\.|<-|->|::|(?:[A-Z]\w*\.)?(?::[!#$%&*+\.\/<=>\?@^|-~\\]+:|[!#$%&*+\.\/<=>\?@^|-~\\]+)|:/, program: /\d+(?:\.\d+)?|'(?:\\(?:[abfnrtv\\"']|\d+)|[^'])'|"(?:\\(?:[abfnrtv&\\"']|\d+)|[^"])*"|=>|\.\.|<-|->|::|(?:[A-Z]\w*\.)?([A-Za-z_]\w*|:[!#$%&*+\.\/<=>\?@^|-~\\]+:|[!#$%&*+\.\/<=>\?@^|-~\\]+)|./, }
できたー゚+.(・∀・)゚+.゚
var source; // <- この source に Haskell のコードを入れる // これでトークナイズできるお! var tokens = source.match(/\d+(?:\.\d+)?|'(?:\\(?:[abfnrtv\\"']|\d+)|[^'])'|"(?:\\(?:[abfnrtv&\\"']|\d+)|[^"])*"|=>|\.\.|<-|->|::|(?:[A-Z]\w*\.)?([A-Za-z_]\w*|:[!#$%&*+\.\/<=>\?@^|-~\\]+:|[!#$%&*+\.\/<=>\?@^|-~\\]+)|./g);
今回は kmyacc を使うから yylex を作るよ
var tokens; function prepareTokens(source) { // トークナイズするよ! tokens = source.match(/\d+(?:\.\d+)?|'(?:\\(?:[abfnrtv\\"']|\d+)|[^'])'|"(?:\\(?:[abfnrtv&\\"']|\d+)|[^"])*"|=>|\.\.|<-|->|::|(?:[A-Z]\w*\.)?([A-Za-z_]\w*|:[!#$%&*+\.\/<=>\?@^|-~\\]+:|[!#$%&*+\.\/<=>\?@^|-~\\]+)|./g); // 空白消すよ! (Firefox only tokens = tokens.filter(/[^\s]/); } function yylex() { var token = tokens.shift(); yylex = token; if (token == undefined) return 0; // keyword switch (token) { case "infixl": return INFIXL; case "infixr": return INFIXR; case "infix": return INFIXN; case "instance": return TINSTANCE; case "class": return TCLASS; case "primitive": return PRIMITIVE; case "case": return CASEXP; case "of": return OF; case "if": return IF; case "then": return THEN; case "else": return ELSE; case "where": return WHERE; case "type": return TYPE; case "data": return DATA; case "newtype": return TNEWTYPE; case "let": return LET; case "in": return IN; case "deriving": return DERIVING; case "default": return DEFAULT; case "import": return IMPORT; case "module": return TMODULE; case "hiding": return HIDING; case "qualified": return QUALIFIED; case "as": return ASMOD; } // literal if (token.match(/^\d+(?:\.\d+)?$/)) return NUMLIT; if (token.match(/^'(?:\\(?:[abfnrtv\\"']|\d+)|[^'])'$/)) return CHARLIT; if (token.match(/^"(?:\\(?:[abfnrtv&\\"']|\d+)|[^"])*"$/)) return STRINGLIT; // var or con if (token.match(/^[!#$%&*+\.\/<=>\?@^|-~\\]+$/)) return VAROP; if (token.match(/^[a-z_]\w*$/)) return VARID; if (token.match(/^:[!#$%&*+\.\/<=>\?@^|-~\\]+:$/)) return CONOP; if (token.match(/^[A-Z]\w*$/)) return CONID; // qvar or qcon if (token.match(/^(?:[A-Z]\w*\.)?[!#$%&*+\.\/<=>\?@^|-~\\]+$/)) return QVAROP; if (token.match(/^(?:[A-Z]\w*\.)?[a-z_]\w*$/)) return QVARID; if (token.match(/^(?:[A-Z]\w*\.)?:[!#$%&*+\.\/<=>\?@^|-~\\]+:$/)) return QCONOP; if (token.match(/^(?:[A-Z]\w*\.)?[A-Z]\w*$/)) return QCONID; // op if (token == '::') return COCO; if (token == '..') return UPTO; if (token == '->') return ARROW; if (token == '<-') return FROM; if (token == '=>') return IMPLIES; return token.charCodeAt(0); }
もう正規表現書きたくない(゚∀゚)=3ムッハー
parser を考える
Ψ(゚д゚)Ψ「あ」<(゚д゚)>「い」(q゚з゚)p「う」∠( ゚д゚)/「え」(┌゚Д゚)┌「お」
kmyacc 使うよ
%token を定義するよ
Hugs のソースから持って来たよ
%token CASEXP OF DATA TYPE IF %token THEN ELSE WHERE LET IN %token INFIXN INFIXL INFIXR PRIMITIVE TNEWTYPE %token DEFAULT DERIVING DO TCLASS TINSTANCE %token NUMLIT CHARLIT STRINGLIT %token VAROP VARID CONOP CONID %token QVAROP QVARID QCONOP QCONID %token COCO '=' UPTO '@' '\\' %token '|' '-' FROM ARROW '~' %token '!' IMPLIES '(' ',' ')' %token '[' ';' ']' '`' '.' %token TMODULE IMPORT HIDING QUALIFIED ASMOD
yacc のルールを書いていくよくるよ
start: module
| modBody
;
module: TMODULE modname expspec WHERE '{' modBody '}'
;
modname: qconid
;
modid: qconid
| STRINGLIT
;
modBody: // empty
| ';' modBody
| topDecls
| impDecls
| impDecls ';' topDecls
;
expspec: // empty
| '(' ')'
| '(' ',' ')'
| '(' exports ')'
| '(' exports ',' ')'
;
exports: exports ',' export
| export
;
export: qvar
| qcon
| qconid '(' UPTO ')'
| qconid '(' qnames ')'
| TMODULE modid
;
qnames: // empty
| ','
| qnames1
| qnames1 ','
;
qnames1: qnames1 ',' qname
| qname
;
qname: qvar
| qcon
;
impDecls: impDecls ';' impDecl
| impDecls ';'
| impDecl
;
impDecl: IMPORT modid impspec
| IMPORT modid ASMOD modid impspec
| IMPORT QUALIFIED modid ASMOD modid impspec
| IMPORT QUALIFIED modid impspec
;
impspec: // empty
| HIDING '(' imports ')'
| '(' imports ')'
;
imports: // empty
| ','
| imports1
| imports1 ','
;
imports1: imports1 ',' import
| import
;
import: var
| CONID
| CONID '(' UPTO ')'
| CONID '(' names ')'
;
names: // empty
| ','
| names1
| names1 ','
;
names1: names1 ',' name
| name
;
name: var
| con
;
topDecl: TYPE tyLhs '=' type
| TYPE tyLhs '=' type IN invars
| DATA btype2 '=' constrs deriving
| DATA context IMPLIES tyLhs '=' constrs deriving
| DATA btype2
| DATA context IMPLIES tyLhs
| TNEWTYPE btype2 '=' constr deriving
| TNEWTYPE context IMPLIES tyLhs '=' constr deriving
| PRIMITIVE prims COCO topType
| TCLASS crule fds wherePart
| TINSTANCE irule wherePart
| DEFAULT '(' dtypes ')'
;
tyLhs: tyLhs varid
| CONID
;
invars: invars ',' invar
| invar
;
invar: var COCO topType
| var
;
constrs: constrs '|' constr
| constr
;
constr: '!' btype conop bbtype
| btype1 conop bbtype
| btype2 conop bbtype
| polyType conop bbtype
| btype2
| btype3
| con '{' fieldspecs '}'
| con '{' '}'
;
btype3: btype2 '!' atype
| btype2 polyType
| btype3 atype
| btype3 '!' atype
| btype3 polyType
| '(' CONOP ')'
;
bbtype: '!' btype
| btype
| polyType
;
fieldspecs: fieldspecs ',' fieldspec
| fieldspec
;
fieldspec: vars COCO polyType
| vars COCO type
| vars COCO '!' type
;
deriving: // empty
| DERIVING qconid
| DERIVING '(' derivs0 ')'
;
derivs0: // empty
| derivs
;
derivs : derivs ',' qconid
| qconid
;
prims: prims ',' prim
| prim
;
prim: var STRINGLIT
| var
;
crule: context IMPLIES btype2
| btype2
;
irule: context IMPLIES btype2
| btype2
;
dtypes: // empty
| dtypes1
;
dtypes1: dtypes1 ',' type
| type
;
fds: // empty
| '|' fds1
;
fds1: fds1 ',' fd
| fd
;
fd: varids0 ARROW varids0
;
varids0: // empty
| varids0 varid
;
topType : context IMPLIES topType0
| topType0
;
topType0 : polyType ARROW topType0
| btype1 ARROW topType0
| btype2 ARROW topType0
| btype
;
polyType: polyType : '(' polyType ')'
| '(' lcontext IMPLIES type ')'
;
varids: varids varid
| varid
;
sigType: context IMPLIES type
| type
;
context: '(' ')'
| btype2
| '(' btype2 ')'
| '(' btypes2 ')'
| lacks
| '(' lacks1 ')'
;
lcontext: lacks
| '(' lacks1 ')'
;
lacks: varid '\\' varid
;
lacks1: btypes2 ',' lacks
| lacks1 ',' btype2
| lacks1 ',' lacks
| btype2 ',' lacks
| lacks
;
type: type1
| btype2
;
type1: btype1
| bpolyType ARROW type
| btype1 ARROW type
| btype2 ARROW type
;
btype: btype1
| btype2
;
btype1: btype1 atype
| atype1
;
btype2: btype2 atype
| qconid
;
atype: atype1
| qconid
;
atype1: varid
| '(' ')'
| '(' ARROW ')'
| '(' type1 ')'
| '(' btype2 ')'
| '(' tupCommas ')'
| '(' btypes2 ')'
| '(' typeTuple ')'
| '(' tfields ')'
| '[' type ']'
| '[' ']'
| '_'
;
btypes2: btypes2 ',' btype2
| btype2 ',' btype2
;
typeTuple: type1 ',' type
| btype2 ',' type1
| btypes2 ',' type1
| typeTuple ',' type
;
gendecl : INFIXN optDigit ops
| INFIXL optDigit ops
| INFIXR optDigit ops
| vars COCO topType
;
optDigit : NUMLIT
| // empty
;
ops : ops ',' op
| op
;
vars: vars ',' var
| var
;
decls: '{' decls0 '}'
| '{' decls1 '}'
;
decls0: // empty
| decls0 ';'
| decls1 ';'
;
decls1: decls0 decl
;
decl: gendecl
| funlhs rhs
| funlhs COCO type rhs
| pat0 rhs
;
funlhs: funlhs0
| funlhs1
| npk
;
funlhs0: pat10_vI varop pat0
| infixPat varop pat0
| NUMLIT varop pat0
| var varop_pl pat0
| var '+' pat0_INT
;
funlhs1: '(' funlhs0 ')' apat
| '(' funlhs1 ')' apat
| '(' npk ')' apat
| var apat
| funlhs1 apat
;
rhs: rhs1 wherePart
;
rhs1: '=' exp
| gdrhs
;
gdrhs: gdrhs gddef
| gddef
;
gddef: '|' exp0 '=' exp
;
wherePart : // empty
| WHERE decls
;
lwherePart : // empty
| WHERE ldecls
;
ldecls: '{' ldecls0 '}'
| '{' ldecls1 '}'
;
ldecls0: // empty
| ldecls0 ';'
| ldecls1 ';'
;
ldecls1: ldecls0 ldecl
;
ldecl: decl
;
pat: npk
| pat_npk
;
pat_npk: pat0 COCO type
| pat0
;
npk: var '+' NUMLIT
;
pat0: var
| NUMLIT
| pat0_vI
;
pat0_INT: var
| pat0_vI
;
pat0_vI: pat10_vI
| infixPat
;
infixPat: '-' pat10
| var qconop pat10
| var qconop '-' pat10
| NUMLIT qconop pat10
| NUMLIT qconop '-' pat10
| pat10_vI qconop pat10
| pat10_vI qconop '-' pat10
| infixPat qconop pat10
| infixPat qconop '-' pat10
;
pat10: fpat
| apat
;
pat10_vI: fpat
| apat_vI
;
fpat: fpat apat
| gcon apat
;
apat: NUMLIT
| var
| apat_vI
;
apat_vI: var '@' apat
| gcon
| qcon '{' patbinds '}'
| CHARLIT
| STRINGLIT
| '_'
| '(' pat_npk ')'
| '(' npk ')'
| '(' pats2 ')'
| '[' pats1 ']'
| '~' apat
;
pats2: pats2 ',' pat
| pat ',' pat
;
pats1: pats1 ',' pat
| pat
;
patbinds : // empty
| patbinds1
;
patbinds1: patbinds1 ',' patbind
| patbind
;
patbind: qvar '=' pat
| var
;
exp: exp0a COCO sigType
| exp0
;
exp0: exp0a
| exp0b
;
exp0a: infixExpa
| exp10a
;
exp0b: infixExpb
| exp10b
;
infixExpa : infixExpa qop '-' exp10a
| infixExpa qop exp10a
| '-' exp10a
| exp10a qop '-' exp10a
| exp10a qop exp10a
;
infixExpb : infixExpa qop '-' exp10b
| infixExpa qop exp10b
| '-' exp10b
| exp10a qop '-' exp10b
| exp10a qop exp10b
;
exp10a : CASEXP exp OF '{' alts '}'
| DO '{' stmts '}'
| LET ldecls IN exp
| IF exp then_exp else_exp
;
exp10b : '\\' pats ARROW exp
| LET ldecls IN exp
| IF exp then_exp else_exp
;
then_exp : ';' THEN exp
| THEN exp
;
else_exp : ';' ELSE exp
| ELSE exp
;
pats : pats apat
| apat
;
appExp : appExp aexp
| aexp
;
aexp : qvar
| qvar '@' aexp
| '~' aexp
| '_'
| gcon
| qcon '{' fbinds '}'
| aexp '{' fbinds '}'
| NUMLIT
| CHARLIT
| STRINGLIT
| '(' exp ')'
| '(' exps2 ')'
| '[' list ']'
| '(' exp10a qop ')'
| '(' qvarop_mi exp0 ')'
| '(' qconop exp0 ')'
;
exps2 : exps2 ',' exp
| exp ',' exp
;
alts : alts1
| ';' alts
;
alts1 : alts1 ';' alt
| alts1 ';'
| alt
;
alt : pat altRhs wherePart
;
altRhs : guardAlts
| ARROW exp
;
guardAlts : guardAlts guardAlt
| guardAlt
;
guardAlt : '|' exp0 ARROW exp
;
stmts : stmts1
| ';' stmts
;
stmts1 : stmts1 ';' stmt
| stmts1 ';'
| stmt
;
stmt : exp FROM exp
| LET ldecls
| exp
;
fbinds: // empty
| fbinds1
;
fbinds1 : fbinds1 ',' fbind
| fbind
;
fbind: var
| qvar '=' exp
;
list : exp
| exps2
| exp zipquals
| exp UPTO exp
| exp ',' exp UPTO
| exp UPTO
| exp ',' exp UPTO exp
;
zipquals : zipquals '|' quals
| '|' quals
;
quals : quals ',' qual
| qual
;
qual : exp FROM exp
| exp
| LET ldecls
;
gcon : qcon
| '(' ')'
| '[' ']'
| '(' tupCommas ')'
;
tupCommas : tupCommas ','
| ','
;
varid : VARID
| HIDING
| QUALIFIED
| ASMOD
;
qconid : QCONID
| CONID
;
var : varid
| '(' VAROP ')'
| '(' '+' ')'
| '(' '-' ')'
| '(' '!' ')'
| '(' '.' ')'
;
qvar : QVARID
| '(' QVAROP ')'
| var
;
con : CONID
| '(' CONOP ')'
;
qcon : QCONID
| '(' QCONOP ')'
| con
;
varop : '+'
| '-'
| varop_mipl
;
varop_mi : '+'
| varop_mipl
;
varop_pl : '-'
| varop_mipl
;
varop_mipl: VAROP
| '`' varid '`'
| '!'
| '.'
;
qvarop : '-'
| qvarop_mi
;
qvarop_mi : QVAROP
| '`' QVARID '`'
| varop_mi
;
conop : CONOP
| '`' CONID '`'
;
qconop : QCONOP
| '`' QCONID '`'
| conop
;
op : varop
| conop
;
qop : qvarop
| qconop
;
Hugs のをほぼ、写経してみたが旨くいかない。。。