
000001| {

000002| (** Autogenerated scanner module *)

000003|

000004| module P = Parser (* tokens are defined here *)

000005|

000006| exception Scan of string

000007|

000008| let error msg pos = Error.error "Scanner error" msg pos

000009|

000010| let get = Lexing.lexeme

000011| let getchar = Lexing.lexeme_char

000012| let strlen = String.length

000013| let pos_start = Lexing.lexeme_start

000014| let pos_end = Lexing.lexeme_end

000015|

000016| let keywords = Hashtbl.create 27

000017| let keyword s = Hashtbl.find keywords s

000018| let _ = Array.iter (fun (str,t(*[1]*)ok) -> Hashtbl.add keywords s(*[25]*)tr tok)

000019| [| ("and" , P.AND)

000020| ; ("break" , P.BREAK)

000021| ; ("case" , P.CASE)

000022| ; ("do" , P.DO)

000023| ; ("else" , P.ELSE)

000024| ; ("elseif" , P.ELSEIF)

000025| ; ("end" , P.END)

000026| ; ("for" , P.FOR)

000027| ; ("function" , P.FUNCTION)

000028| ; ("globmatch" , P.GLOBMATCH)

000029| ; ("goto" , P.GOTO)

000030| ; ("if" , P.IF)

000031| ; ("in" , P.IN)

000032| ; ("local" , P.LOCAL)

000033| ; ("nil" , P.NIL)

000034| ; ("true" , P.TRUE)

000035| ; ("false" , P.FALSE)

000036| ; ("not" , P.NOT)

000037| ; ("of" , P.OF)

000038| ; ("or" , P.OR)

000039| ; ("repeat" , P.REPEAT)

000040| ; ("return" , P.RETURN)

000041| ; ("then" , P.THEN)

000042| ; ("until" , P.UNTIL)

000043| ; ("while" , P.WHILE)

000044| |]

000045|

000046| }

000047|

000048| let digit = ['0'-'9']

000049| let alpha = ['a'-'z' 'A'-'Z']

000050| let misc = ['_']

000051|

000052| let sign = ['+' '-']

000053| let exp = ['e''E'] sign? digit+

000054|

000055| let hexdigit = digit | ['a'-'f' 'A'-'F']

000056|

000057| let INT = digit+

000058| let HEX = ("0x"|"0X") hexdigit+

000059| let FLOAT_OR_EXP = (digit+ '.' digit* exp?)

000060| | ('.' digit+ exp?)

000061| | (digit+ exp)

000062|

000063| let deccharcode = ('\\' digit) | ('\\' digit digit) | ('\\' digit digit digit)

000064| let hexcharcode = "\\x" hexdigit hexdigit

000065|

000066| let id = (alpha | misc) (alpha | misc | digit)*

000067| let ws = [' ' '\t' '\r'] (* newline gets extra treatment! *)

000068|

000069| let tab = '\009'

000070| let cr = '\013'

000071| let lf = '\010'

000072| let nl = cr | lf | cr lf

000073|

000074| rule token = parse (* raise Error in case of error *)

000075| eof { P.EOF }

000076| | ws+ { token lexbuf }

000077| | nl { Lexing.new_line lexbuf; token lexbuf }

000078| | id { let s = get lexbuf in

000079| try keyword s with Not_found -> P.NAME s

000080| }

000081| | INT { let s = get lexbuf in P.NUMBER (Ast.Int (int_of_string s)) }

000082| | HEX { let s = get lexbuf in P.NUMBER (Ast.Hex (int_of_string s)) }

000083| | FLOAT_OR_EXP { let s = get lexbuf in P.NUMBER (Ast.Float (float_of_string s)) }

000084|

000085| | ".." { P.CONC }

000086| | "..." { P.DOTS }

000087| | "<=" { P.LE }

000088| | "==" { P.EQ }

000089| | "=>" { P.ARROW }

000090| | ">=" { P.GE }

000091| | "~=" { P.NE }

000092| | '(' { P.LPAR }

000093| | ')' { P.RPAR }

000094| | '*' { P.STAR }

000095| | '+' { P.PLUS }

000096| | ',' { P.COMMA }

000097| | '-' { P.MINUS }

000098| | '.' { P.DOT }

000099| | '#' { P.HASH }

000100| | '/' { P.SLASH }

000101| | '%' { P.MOD }

000102| | ':' { P.COLON }

000103| | ';' { P.SEMI }

000104| | '<' { P.LT }

000105| | '=' { P.GETS }

000106| | '>' { P.GT }

000107| | '[' { P.LSQ }

000108| | ']' { P.RSQ }

000109| | '^' { P.HAT }

000110| | '{' { P.LBRA }

000111| | '}' { P.RBRA }

000112|

000113| (* this token is defined in the LUA lex.c file but is not used

000114| | '~' { fun map -> P.TILDE } *)

000115|

000116| | "--[" '='* '[' { let level = String.length (get lexbuf) - 4 in (* sans --[ [ *)

000117| longcomment level lexbuf }

000118| | "--" nl { Lexing.new_line lexbuf; token lexbuf }

000119| | "--" [^ '[' '\n'] [^ '\n']* { token lexbuf }

000120| | '\'' { shortstring lexbuf "'" (Buffer.create 80) }

000121| | '"' { shortstring lexbuf "\"" (Buffer.create 80) }

000122| | '[' '='* '[' { let level = String.length (get lexbuf) - 2 in (* sans [ [ *)

000123| longstring level lexbuf 1 (Buffer.create 160) }

000124|

000125| | _ { error "illegal character" lexbuf.Lexing.lex_start_p }

000126|

000127| and longcomment level = parse

000128| eof { error "end of file in long comment" lexbuf.Lexing.lex_start_p }

000129| | nl { Lexing.new_line lexbuf; longcomment level lexbuf }

000130| | ']' '='* ']' { let exitlevel = String.length (get lexbuf) - 2 in (* sans ] ] *)

000131| if level = exitlevel

000132| then token lexbuf

000133| else longcomment level lexbuf }

000134| | _ { longcomment level lexbuf }

000135|

000136| and longstring level = parse (* parse a [[ .. ]] string *)

000137| eof { fun n buf -> error "end of file in long string" lexbuf.Lexing.lex_start_p }

000138| | ']' '='* ']' { fun n buf ->

000139| let s = get lexbuf in

000140| let exitlevel = String.length s - 2 in (* sans ] ] *)

000141| if level = exitlevel

000142| then P.LONGSTRING (Ast.Long (Buffer.contents buf))

000143| else ( Buffer.add_string buf s;

000144| longstring level lexbuf (n-1) buf) }

000145| | "[[" { fun n buf ->

000146| ( Buffer.add_string buf "[["

000147| ; longstring level lexbuf (n+1) buf

000148| ) }

000149| | [^']' '[' '\n']+

000150| | ']'

000151| | '[' { fun n buf ->

000152| let s = get lexbuf in

000153| ( Buffer.add_string buf s

000154| ; longstring level lexbuf n buf

000155| ) }

000156| | nl { fun n buf ->

000157| ( Buffer.add_char buf '\n'

000158| ; Lexing.new_line lexbuf (*; nl lexbuf map*)

000159| ; longstring level lexbuf n buf

000160| ) }

000161|

000162| and shortstring = parse (* parse an eos delimited string *)

000163| eof { fun eos buf -> error "end of file in string" lexbuf.Lexing.lex_start_p }

000164| | '\n' { fun eos buf -> error "end of line in string" lexbuf.Lexing.lex_start_p }

000165| | hexcharcode { fun eos buf ->

000166| let s = get lexbuf in

000167| let s = "0" ^ (String.sub s 1 3) in (*starting at 'x', finishing at the second hexdigit*)

000168| let i = int_of_string s in (* now in OCaml's 0xhh format *)

000169| (Buffer.add_char buf (Char.chr i);

000170| shortstring lexbuf eos buf)

000171| }

000172| | deccharcode { fun eos buf ->

000173| let s = get lexbuf in

000174| let s = String.sub s 1 (String.length s - 1) in

000175| let i = int_of_string s in

000176| if i <= 255

000177| then

000178| (Buffer.add_char buf (Char.chr i);

000179| shortstring lexbuf eos buf)

000180| else error "decimal escape too large" lexbuf.Lexing.lex_start_p

000181| }

000182| | '\\' _ { fun eos buf ->

000183| let c = getchar lexbuf 1 in

000184| let k = match c with

000185| | 'a' -> '\007' (* '\a' *)

000186| | 'b' -> '\b'

000187| | 'f' -> '\012' (* '\f' *)

000188| | 'n' -> '\n'

000189| | 'r' -> '\r'

000190| | 't' -> '\t'

000191| | 'v' -> '\011' (* '\v' *)

000192| | '\n' -> '\n'

000193| | '\\' -> '\\'

000194| | '\"' -> '\"'

000195| | '\'' -> '\''

000196| | _ -> error "illegal escape sequence in string" lexbuf.Lexing.lex_start_p

000197| (*also handle: [ ]*)

000198| in (Buffer.add_char buf k;

000199| shortstring lexbuf eos buf)

000200| }

000201| | [^'"' '\'' '\n' '\\']+

000202| { fun eos buf ->

000203| let s = get lexbuf in

000204| ( Buffer.add_string buf s

000205| ; shortstring lexbuf eos buf

000206| )

000207| }

000208| | ['"' '\'' ] { fun eos buf ->

000209| let s = get lexbuf in

000210| if s = eos then

000211| match s with

000212| | "'" -> P.STRING (Ast.Char (Buffer.contents buf))

000213| | _ -> P.STRING (Ast.Normal (Buffer.contents buf))

000214| else

000215| ( Buffer.add_string buf s

000216| ; shortstring lexbuf eos buf

000217| )

000218| }

000219| | _ { fun eos buf -> assert false }

000220|

000221|

000222| {

000223| let str2str s = match s with

000224| | Ast.Normal s -> "\"" ^ s ^ "\""

000225| | Ast.Char s -> "'" ^ s ^ "'"

000226| | Ast.Long s -> "[[" ^ s ^ "]]"

000227| let tok2str = function

000228| | P.AND -> "AND"

000229| | P.ARROW -> "ARROW"

000230| | P.BREAK -> "BREAK"

000231| | P.CASE -> "CASE"

000232| | P.COLON -> "COLON"

000233| | P.COMMA -> "COMMA"

000234| | P.CONC -> "CONC"

000235| | P.DEBUG_PRAGMA n-> "DEBUG_PRAGMA" ^ (string_of_int n)

000236| | P.DO -> "DO"

000237| | P.DOT -> "DOT"

000238| | P.DOTS -> "DOTS"

000239| | P.ELSE -> "ELSE"

000240| | P.ELSEIF -> "ELSEIF"

000241| | P.END -> "END"

000242| | P.EOF -> "EOF"

000243| | P.EQ -> "EQ"

000244| | P.FOR -> "FOR"

000245| | P.FUNCTION -> "FUNCTION"

000246| | P.GE -> "GE"

000247| | P.GETS -> "GETS"

000248| | P.GLOBMATCH -> "GLOBMATCH"

000249| | P.GOTO -> "GOTO"

000250| | P.GT -> "GT"

000251| | P.HAT -> "HAT"

000252| | P.IF -> "IF"

000253| | P.IN -> "IN"

000254| | P.LBRA -> "LBRA"

000255| | P.LE -> "LE"

000256| | P.LOCAL -> "LOCAL"

000257| | P.LPAR -> "LPAR"

000258| | P.LSQ -> "LSQ"

000259| | P.LT -> "LT"

000260| | P.MINUS -> "MINUS"

000261| | P.NAME x -> "NAME("^x^")"

000262| | P.NE -> "NE"

000263| | P.NIL -> "NIL"

000264| | P.TRUE -> "TRUE"

000265| | P.FALSE -> "FALSE"

000266| | P.NOT -> "NOT"

000267| | P.HASH -> "HASH"

000268| | P.NUMBER _ -> "NUMBER"

000269| | P.OF -> "OF"

000270| | P.OR -> "OR"

000271| | P.PLUS -> "PLUS"

000272| | P.RBRA -> "RBRA"

000273| | P.REPEAT -> "REPEAT"

000274| | P.RETURN -> "RETURN"

000275| | P.RPAR -> "RPAR"

000276| | P.RSQ -> "RSQ"

000277| | P.SEMI -> "SEMI"

000278| | P.SLASH -> "SLASH"

000279| | P.MOD -> "MOD"

000280| | P.STAR -> "STAR"

000281| | P.STRING x -> "STRING("^(str2str x)^")"

000282| | P.LONGSTRING x -> "LONGSTRING("^(str2str x)^")"

000283| | P.THEN -> "THEN"

000284| | P.UNARY -> "UNARY"

000285| | P.UNTIL -> "UNTIL"

000286| | P.WHILE -> "WHILE"

000287| | P.WRONGTOKEN -> "WRONGTOKEN"

000288| }