From: Fredrik Karlsson on
Dear group,

I am using the parser tools to generate a snit parser for a file
format. So far, I have been sucessful using this generator script:

-------
package require pt::pgen
package require fileutil

puts [file normalize [file join [file dirname [file normalize [info
script] ] ] .. filetypes TextGrid.peg ] ]
puts [file normalize [file join [file dirname [file normalize [info
script] ] ] .. textgrid_parser.tcl]]

set peg [read [open [file normalize [file join [file dirname [file
normalize [info script] ] ] .. filetypes TextGrid.peg ] ] r ] ]
set textgrid_parser [pt::pgen peg $peg snit -file grammar.peg -name
TextGrid -user $tcl_platform(user) -class textgrid_parser]

fileutil::writeFile [file normalize [file join [file dirname [file
normalize [info script] ] ] .. textgrid_parser.tcl]] $textgrid_parser
----

and this test script:

---
package require snit
package require pt::ast

package require pt::pe
source [file normalize [file join [file dirname [file normalize [info
script] ] ] .. textgrid_parser.tcl]]

set p [textgrid_parser constructor]

set inFile [open [file normalize [file join [file dirname [file
normalize [info script] ] ] .. .. testing testdata testdata.TextGrid]]
r ]

set currentquery [read $inFile]
puts $currentquery
if { [ catch {$p parset $currentquery} inast ] } {
puts [llength $inast]
puts $inast
set fault [string range $currentquery 0 [lindex $inast 1] ]
return -code error "Could not parse file Failed at \"$fault\""
}

puts $inast
puts [pt::ast print $inast ]
close $inFile
---
Usually, I get the last sucessfully parsed part of the file from this
script (through the return value of the statement within the "catch"
statement) which has been very helpful.

Now, however, it seems that the entire file is parsed (at least I
cannot notice a difference beween the last parsed point in the file,
and the point where I would get an EOF), but instead of the expected
sucessful parsing, I get the strange error message "expected start <=
end for range" in $inast.

What am I doing wrong?

I attach the PEG and the file to be parsed below...

I would very much appreciate all the help I could get!

(Sorry for posting this here in case this is rather a PEG problem..
but I just get the feeling that this might be a Tcl problem...?)

/Fredrik

--- PEG ----
PEG TextGrid (file)
file <- header datapart EOF?;
header <- (variableexpression / booleanexpression/ emptyline )+ ;
variableexpression <- WS variablename WS equal WS (qstring / real /
integer) WS EOL;
booleanexpression <- WS variablename WS langle asciistring rangle WS
EOL;
qstring <- (WS quotechar asciistring quotechar WS) / (WS quotechar
stringvalue quotechar WS) ;
variablename <- 'File type' / 'Object class' / 'xmin' / 'xmax' /
'tiers?' / 'size' / 'name' / 'class' / 'text' / 'mark';
stringvalue <- (!quotechar ("\\" [nrt'"\[\]\\] / "\\" [0-2][0-7]
[0-7] / "\\" [0-7][0-7]? / "\\" 'u' HexDigit (HexDigit (HexDigit
HexDigit?)?)? / !"\\" . / [0-9a-fA-F]) )* ;
asciistring <- <alnum>+;
real <- (<ddigit> / <digit> / DOT )+ ;
integer <- (<ddigit> / <digit>)+ ;
datapart <- tiers (intervaltier / pointtier)+ ;
tiers <- 'item' WS '[]' WS ':' WS EOL ;
intervaltier <- item (variableexpression)+ size (interval)+;
item <- WS 'item' WS '[' WS integer WS ']' WS ':' EOL;
size <- WS ('intervals' / 'points') WS ':' variableexpression;
iheader <- WS 'intervals' WS '[' WS integer WS ']' WS ':' EOL;
pheader <- WS 'points' WS '[' WS integer WS ']' WS ':' EOL;
interval <- WS iheader variableexpression variableexpression
variableexpression;
point <- WS pheader variableexpression variableexpression;
void: emptyline <- WS EOL;
void: qm <- '?';
void: equal <- '=';
void: quotechar <- '"' ;
void: WS <- (" " / "\t")* ;
void: langle <- '<';
void: rangle <- '>';
void: EOL <- "\n\r" / "\n" / "\r" ;
void: EOF <- !. ;
leaf: DOT <- ".";
END ;
---- /PEG ----

---- TESTFILE ---
File type = "ooTextFile"
Object class = "TextGrid"

xmin = 0
xmax = 1
tiers? <exists>
size = 5
item []:
item [1]:
class = "IntervalTier"
name = "utterance"
xmin = 0
xmax = 1
intervals: size = 5
intervals [1]:
xmin = 0
xmax = 0.028199999999999992
text = ""
intervals [2]:
xmin = 0.028199999999999992
xmax = 0.31277777777777777
text = "first words"
intervals [3]:
xmin = 0.31277777777777777
xmax = 0.36996565656565655
text = ""
intervals [4]:
xmin = 0.36996565656565655
xmax = 0.8056828282828282
text = "second words"
intervals [5]:
xmin = 0.8056828282828282
xmax = 1
text = ""
item [2]:
class = "IntervalTier"
name = "John"
xmin = 0
xmax = 1
intervals: size = 8
intervals [1]:
xmin = 0
xmax = 0.028199999999999992
text = ""
intervals [2]:
xmin = 0.028199999999999992
xmax = 0.14938383838383837
text = "first"
intervals [3]:
xmin = 0.14938383838383837
xmax = 0.31277777777777777
text = "second"
intervals [4]:
xmin = 0.31277777777777777
xmax = 0.36996565656565655
text = ""
intervals [5]:
xmin = 0.36996565656565655
xmax = 0.5074888888888889
text = "third"
intervals [6]:
xmin = 0.5074888888888889
xmax = 0.6559050505050505
text = "fourth"
intervals [7]:
xmin = 0.6559050505050505
xmax = 0.8056828282828282
text = "fifth"
intervals [8]:
xmin = 0.8056828282828282
xmax = 1
text = ""
item [3]:
class = "IntervalTier"
name = "acoustics"
xmin = 0
xmax = 1
intervals: size = 7
intervals [1]:
xmin = 0
xmax = 0.05951717171717171
text = ""
intervals [2]:
xmin = 0.05951717171717171
xmax = 0.1275979797979798
text = "breathy"
intervals [3]:
xmin = 0.1275979797979798
xmax = 0.22563434343434344
text = ""
intervals [4]:
xmin = 0.22563434343434344
xmax = 0.44485454545454545
text = "aspiration"
intervals [5]:
xmin = 0.44485454545454545
xmax = 0.5074888888888889
text = ""
intervals [6]:
xmin = 0.5074888888888889
xmax = 0.6559050505050505
text = "breathy"
intervals [7]:
xmin = 0.6559050505050505
xmax = 1
text = ""
item [4]:
class = "IntervalTier"
name = "Miami"
xmin = 0
xmax = 1
intervals: size = 3
intervals [1]:
xmin = 0
xmax = 0.14938383838383837
text = ""
intervals [2]:
xmin = 0.14938383838383837
xmax = 0.3182242424242424
text = "slightly sloppy"
intervals [3]:
xmin = 0.3182242424242424
xmax = 1
text = ""
item [5]:
class = "TextTier"
name = "bell"
xmin = 0
xmax = 1
points: size = 2
points [1]:
time = 0.18342424242424243
mark = "release"
points [2]:
time = 0.45438585858585856
mark = "voicing"
---- TESTFILE ---