000001| {
000002| (** Autogenerated scanner module *)
000003|
000004| module P = Parser (* tokens are defined here *)
000005|
000006| exception Scan of string
000007|
000008| let error msg pos = Error.error "Scanner error" msg pos
000009|
000010| let get = Lexing.lexeme
000011| let getchar = Lexing.lexeme_char
000012| let strlen = String.length
000013| let pos_start = Lexing.lexeme_start
000014| let pos_end = Lexing.lexeme_end
000015|
000016| let keywords = Hashtbl.create 27
000017| let keyword s = Hashtbl.find keywords s
000018| let _ = Array.iter (fun (str,t(*[1]*)ok) -> Hashtbl.add keywords s(*[25]*)tr tok)
000019| [| ("and" , P.AND)
000020| ; ("break" , P.BREAK)
000021| ; ("case" , P.CASE)
000022| ; ("do" , P.DO)
000023| ; ("else" , P.ELSE)
000024| ; ("elseif" , P.ELSEIF)
000025| ; ("end" , P.END)
000026| ; ("for" , P.FOR)
000027| ; ("function" , P.FUNCTION)
000028| ; ("globmatch" , P.GLOBMATCH)
000029| ; ("goto" , P.GOTO)
000030| ; ("if" , P.IF)
000031| ; ("in" , P.IN)
000032| ; ("local" , P.LOCAL)
000033| ; ("nil" , P.NIL)
000034| ; ("true" , P.TRUE)
000035| ; ("false" , P.FALSE)
000036| ; ("not" , P.NOT)
000037| ; ("of" , P.OF)
000038| ; ("or" , P.OR)
000039| ; ("repeat" , P.REPEAT)
000040| ; ("return" , P.RETURN)
000041| ; ("then" , P.THEN)
000042| ; ("until" , P.UNTIL)
000043| ; ("while" , P.WHILE)
000044| |]
000045|
000046| }
000047|
000048| let digit = ['0'-'9']
000049| let alpha = ['a'-'z' 'A'-'Z']
000050| let misc = ['_']
000051|
000052| let sign = ['+' '-']
000053| let exp = ['e''E'] sign? digit+
000054|
000055| let hexdigit = digit | ['a'-'f' 'A'-'F']
000056|
000057| let INT = digit+
000058| let HEX = ("0x"|"0X") hexdigit+
000059| let FLOAT_OR_EXP = (digit+ '.' digit* exp?)
000060| | ('.' digit+ exp?)
000061| | (digit+ exp)
000062|
000063| let deccharcode = ('\\' digit) | ('\\' digit digit) | ('\\' digit digit digit)
000064| let hexcharcode = "\\x" hexdigit hexdigit
000065|
000066| let id = (alpha | misc) (alpha | misc | digit)*
000067| let ws = [' ' '\t' '\r'] (* newline gets extra treatment! *)
000068|
000069| let tab = '\009'
000070| let cr = '\013'
000071| let lf = '\010'
000072| let nl = cr | lf | cr lf
000073|
000074| rule token = parse (* raise Error in case of error *)
000075| eof { P.EOF }
000076| | ws+ { token lexbuf }
000077| | nl { Lexing.new_line lexbuf; token lexbuf }
000078| | id { let s = get lexbuf in
000079| try keyword s with Not_found -> P.NAME s
000080| }
000081| | INT { let s = get lexbuf in P.NUMBER (Ast.Int (int_of_string s)) }
000082| | HEX { let s = get lexbuf in P.NUMBER (Ast.Hex (int_of_string s)) }
000083| | FLOAT_OR_EXP { let s = get lexbuf in P.NUMBER (Ast.Float (float_of_string s)) }
000084|
000085| | ".." { P.CONC }
000086| | "..." { P.DOTS }
000087| | "<=" { P.LE }
000088| | "==" { P.EQ }
000089| | "=>" { P.ARROW }
000090| | ">=" { P.GE }
000091| | "~=" { P.NE }
000092| | '(' { P.LPAR }
000093| | ')' { P.RPAR }
000094| | '*' { P.STAR }
000095| | '+' { P.PLUS }
000096| | ',' { P.COMMA }
000097| | '-' { P.MINUS }
000098| | '.' { P.DOT }
000099| | '#' { P.HASH }
000100| | '/' { P.SLASH }
000101| | '%' { P.MOD }
000102| | ':' { P.COLON }
000103| | ';' { P.SEMI }
000104| | '<' { P.LT }
000105| | '=' { P.GETS }
000106| | '>' { P.GT }
000107| | '[' { P.LSQ }
000108| | ']' { P.RSQ }
000109| | '^' { P.HAT }
000110| | '{' { P.LBRA }
000111| | '}' { P.RBRA }
000112|
000113| (* this token is defined in the LUA lex.c file but is not used
000114| | '~' { fun map -> P.TILDE } *)
000115|
000116| | "--[" '='* '[' { let level = String.length (get lexbuf) - 4 in (* sans --[ [ *)
000117| longcomment level lexbuf }
000118| | "--" nl { Lexing.new_line lexbuf; token lexbuf }
000119| | "--" [^ '[' '\n'] [^ '\n']* { token lexbuf }
000120| | '\'' { shortstring lexbuf "'" (Buffer.create 80) }
000121| | '"' { shortstring lexbuf "\"" (Buffer.create 80) }
000122| | '[' '='* '[' { let level = String.length (get lexbuf) - 2 in (* sans [ [ *)
000123| longstring level lexbuf 1 (Buffer.create 160) }
000124|
000125| | _ { error "illegal character" lexbuf.Lexing.lex_start_p }
000126|
000127| and longcomment level = parse
000128| eof { error "end of file in long comment" lexbuf.Lexing.lex_start_p }
000129| | nl { Lexing.new_line lexbuf; longcomment level lexbuf }
000130| | ']' '='* ']' { let exitlevel = String.length (get lexbuf) - 2 in (* sans ] ] *)
000131| if level = exitlevel
000132| then token lexbuf
000133| else longcomment level lexbuf }
000134| | _ { longcomment level lexbuf }
000135|
000136| and longstring level = parse (* parse a [[ .. ]] string *)
000137| eof { fun n buf -> error "end of file in long string" lexbuf.Lexing.lex_start_p }
000138| | ']' '='* ']' { fun n buf ->
000139| let s = get lexbuf in
000140| let exitlevel = String.length s - 2 in (* sans ] ] *)
000141| if level = exitlevel
000142| then P.LONGSTRING (Ast.Long (Buffer.contents buf))
000143| else ( Buffer.add_string buf s;
000144| longstring level lexbuf (n-1) buf) }
000145| | "[[" { fun n buf ->
000146| ( Buffer.add_string buf "[["
000147| ; longstring level lexbuf (n+1) buf
000148| ) }
000149| | [^']' '[' '\n']+
000150| | ']'
000151| | '[' { fun n buf ->
000152| let s = get lexbuf in
000153| ( Buffer.add_string buf s
000154| ; longstring level lexbuf n buf
000155| ) }
000156| | nl { fun n buf ->
000157| ( Buffer.add_char buf '\n'
000158| ; Lexing.new_line lexbuf (*; nl lexbuf map*)
000159| ; longstring level lexbuf n buf
000160| ) }
000161|
000162| and shortstring = parse (* parse an eos delimited string *)
000163| eof { fun eos buf -> error "end of file in string" lexbuf.Lexing.lex_start_p }
000164| | '\n' { fun eos buf -> error "end of line in string" lexbuf.Lexing.lex_start_p }
000165| | hexcharcode { fun eos buf ->
000166| let s = get lexbuf in
000167| let s = "0" ^ (String.sub s 1 3) in (*starting at 'x', finishing at the second hexdigit*)
000168| let i = int_of_string s in (* now in OCaml's 0xhh format *)
000169| (Buffer.add_char buf (Char.chr i);
000170| shortstring lexbuf eos buf)
000171| }
000172| | deccharcode { fun eos buf ->
000173| let s = get lexbuf in
000174| let s = String.sub s 1 (String.length s - 1) in
000175| let i = int_of_string s in
000176| if i <= 255
000177| then
000178| (Buffer.add_char buf (Char.chr i);
000179| shortstring lexbuf eos buf)
000180| else error "decimal escape too large" lexbuf.Lexing.lex_start_p
000181| }
000182| | '\\' _ { fun eos buf ->
000183| let c = getchar lexbuf 1 in
000184| let k = match c with
000185| | 'a' -> '\007' (* '\a' *)
000186| | 'b' -> '\b'
000187| | 'f' -> '\012' (* '\f' *)
000188| | 'n' -> '\n'
000189| | 'r' -> '\r'
000190| | 't' -> '\t'
000191| | 'v' -> '\011' (* '\v' *)
000192| | '\n' -> '\n'
000193| | '\\' -> '\\'
000194| | '\"' -> '\"'
000195| | '\'' -> '\''
000196| | _ -> error "illegal escape sequence in string" lexbuf.Lexing.lex_start_p
000197| (*also handle: [ ]*)
000198| in (Buffer.add_char buf k;
000199| shortstring lexbuf eos buf)
000200| }
000201| | [^'"' '\'' '\n' '\\']+
000202| { fun eos buf ->
000203| let s = get lexbuf in
000204| ( Buffer.add_string buf s
000205| ; shortstring lexbuf eos buf
000206| )
000207| }
000208| | ['"' '\'' ] { fun eos buf ->
000209| let s = get lexbuf in
000210| if s = eos then
000211| match s with
000212| | "'" -> P.STRING (Ast.Char (Buffer.contents buf))
000213| | _ -> P.STRING (Ast.Normal (Buffer.contents buf))
000214| else
000215| ( Buffer.add_string buf s
000216| ; shortstring lexbuf eos buf
000217| )
000218| }
000219| | _ { fun eos buf -> assert false }
000220|
000221|
000222| {
000223| let str2str s = match s with
000224| | Ast.Normal s -> "\"" ^ s ^ "\""
000225| | Ast.Char s -> "'" ^ s ^ "'"
000226| | Ast.Long s -> "[[" ^ s ^ "]]"
000227| let tok2str = function
000228| | P.AND -> "AND"
000229| | P.ARROW -> "ARROW"
000230| | P.BREAK -> "BREAK"
000231| | P.CASE -> "CASE"
000232| | P.COLON -> "COLON"
000233| | P.COMMA -> "COMMA"
000234| | P.CONC -> "CONC"
000235| | P.DEBUG_PRAGMA n-> "DEBUG_PRAGMA" ^ (string_of_int n)
000236| | P.DO -> "DO"
000237| | P.DOT -> "DOT"
000238| | P.DOTS -> "DOTS"
000239| | P.ELSE -> "ELSE"
000240| | P.ELSEIF -> "ELSEIF"
000241| | P.END -> "END"
000242| | P.EOF -> "EOF"
000243| | P.EQ -> "EQ"
000244| | P.FOR -> "FOR"
000245| | P.FUNCTION -> "FUNCTION"
000246| | P.GE -> "GE"
000247| | P.GETS -> "GETS"
000248| | P.GLOBMATCH -> "GLOBMATCH"
000249| | P.GOTO -> "GOTO"
000250| | P.GT -> "GT"
000251| | P.HAT -> "HAT"
000252| | P.IF -> "IF"
000253| | P.IN -> "IN"
000254| | P.LBRA -> "LBRA"
000255| | P.LE -> "LE"
000256| | P.LOCAL -> "LOCAL"
000257| | P.LPAR -> "LPAR"
000258| | P.LSQ -> "LSQ"
000259| | P.LT -> "LT"
000260| | P.MINUS -> "MINUS"
000261| | P.NAME x -> "NAME("^x^")"
000262| | P.NE -> "NE"
000263| | P.NIL -> "NIL"
000264| | P.TRUE -> "TRUE"
000265| | P.FALSE -> "FALSE"
000266| | P.NOT -> "NOT"
000267| | P.HASH -> "HASH"
000268| | P.NUMBER _ -> "NUMBER"
000269| | P.OF -> "OF"
000270| | P.OR -> "OR"
000271| | P.PLUS -> "PLUS"
000272| | P.RBRA -> "RBRA"
000273| | P.REPEAT -> "REPEAT"
000274| | P.RETURN -> "RETURN"
000275| | P.RPAR -> "RPAR"
000276| | P.RSQ -> "RSQ"
000277| | P.SEMI -> "SEMI"
000278| | P.SLASH -> "SLASH"
000279| | P.MOD -> "MOD"
000280| | P.STAR -> "STAR"
000281| | P.STRING x -> "STRING("^(str2str x)^")"
000282| | P.LONGSTRING x -> "LONGSTRING("^(str2str x)^")"
000283| | P.THEN -> "THEN"
000284| | P.UNARY -> "UNARY"
000285| | P.UNTIL -> "UNTIL"
000286| | P.WHILE -> "WHILE"
000287| | P.WRONGTOKEN -> "WRONGTOKEN"
000288| }