# This is part of ladoscope.
# author:  P. Lemaire
# version: Mar 15, 2005

This file describes in a lex/yacc way the file formats that ladoscope knows.

General notes:
  . main rule: "one line = one item"
    (an item being an observation, a pattern, or a variable with its cutpoints)
  . Unix, Dos and Mac end-of-lines are supported
  . white spaces are always ignored
  . empty lines are ignored
    (warning: a line with only white spaces is not ignored!)
  . whenever a float number is expected, an integer is fine.
  . numbers must follow a decimal format (e.g. 10 or 2.2); 
    numbers as "1e+2" are not recognized.

==============================================================================
=== instances
==============================================================================
--- lexical tokens -----------------------------------------------------------

tokens:
 | [' ']                         -> (* skip spaces *)
 | ['\n''\r']+                   -> EOL
 | eof                           -> EOL
 | ('-')?['0'-'9']+'.'['0'-'9']* -> FLOAT 
 | ('-')?['0'-'9']+ as x         -> INT
 | [^';''\n''\r']* as x   { STRING x }
 | ';'                           -> SEP

--- grammar ------------------------------------------------------------------

observation <- name SEP kind SEP values
	    <- kind SEP values

name	    <- STRING

kind	    <- INT

values      <- num EOL		(* read the last value *)
	    <- num SEP values	(* read a value and goes on *)
	    <- SEP values       (* read a missing value and goes on *)
	    <- EOL		(* done *)

num	    <- INT
	    <- FLOAT

notes:
  . the class of an observation must be an integer.
  . a value may be missing.
  . a name may be given. A name is any string not including ";" and that
    cannot be interpreted as a INT or a FLOAT. Spaces are allowed.
    A name is mandatory, but it cannot be empty (an observation ";1;0.2..."
    is not valid); a name "_" is considered as missing.

==============================================================================
=== models / pandects
==============================================================================
--- lexical tokens -----------------------------------------------------------

tokens:
 | [' ']                         -> (* skip spaces *)
 | ['\n''\r']+                   -> EOL
 | eof                           -> EOL
 | ('-')?['0'-'9']+ as x         -> INT
 | ('-')?['0'-'9']+'.'['0'-'9']* -> FLOAT 
 | "nan"                         -> FLOAT nan
 | "inf"                         -> FLOAT infinity
 | '<'                           -> INF
 | '>'                           -> SUP
 | '&'                           -> AND
 | '('                           -> OPEN
 | ')'                           -> CLOSE
 | ','                           -> COMA
 | ('x'|'X')(['0'-'9']+ as s)    -> VAR
 | ';'                           -> SEP

--- grammar ------------------------------------------------------------------

pattern		<- characteristics monomes

characteristic  <- OPEN INT COMA num COMA num CLOSE
		<- OPEN INT COMA num COMA num COMA num CLOSE

monomes		<- monome EOL	       (* read the last monome *)
		<- monome AND monomes  (* read one monome and goes on *)

monome		<- num INF VAR	       (* "1.2 < x3" *)
		<- num SUP VAR	       (* "1.2 > x3" *)
		<- VAR SUP num	       (* "x3 > 1.2" *)
		<- VAR INF num	       (* "x3 < 1.2" *)
		<- num INF VAR INF num (* "1.2 < x3 < 4.5" *)
		<- num SUP VAR SUP num (* "4.5 > x3 > 1.2" *)

num		<- INT
		<- FLOAT

notes:
 . the characteristics are, in order: class, homogeneity, prevalence,
   hr (this last one is optional).

==============================================================================
=== cutpointsets
==============================================================================
--- lexical tokens -----------------------------------------------------------

tokens:
 | [' ']                         -> (* skip spaces *)
 | ['\n''\r']+                   -> EOL
 | eof                           -> EOL
 | ('-')?['0'-'9']+'.'['0'-'9']* -> FLOAT 
 | ('-')?['0'-'9']+ as x         -> INT
 | ':'                           -> SEP

--- grammar ------------------------------------------------------------------

cutpointset	<- var SEP values

var		<- INT

values		<- num EOL         (* read the last cutpoint *)
		<- num values      (* read a cutpoint and goes on*)

num		<- INT
		<- FLOAT