branch: externals/idlwave commit 8d56c8df9854277d649ae74a53be0e417721bb5e Author: jdsmith <jdsmith> Commit: jdsmith <jdsmith>
- Updated for IDL V6.1. - Find additional linked keyword sections, by setting add_keywords, even when scanning Keywords Sections (not just syntax keywords). - Allow linking keywords from routines for which we don't know the type (fun or pro). - Can specify "only keys", separate from "has" for add_keywords (has only works if they show up in the Syntax section). - Trim multi-line HTML tags. - Leading multi-plexers like [XYZ], in addition to {X|Y|Z} have shown up. - Allow parse callbacks to end processing for that section by returning -1. - Remove space around underscores. - Various new special matchers, lowercase keywords, a bogus DATAMINER match against Syntax, etc. --- get_html_rinfo | 256 ++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 190 insertions(+), 66 deletions(-) diff --git a/get_html_rinfo b/get_html_rinfo index 1f22208e98..e01952ed8a 100755 --- a/get_html_rinfo +++ b/get_html_rinfo @@ -3,10 +3,10 @@ # Program to extract the information from the HTML version of the IDL # manuals (v5.6 and on) and IDL itself, to support IDLWAVE. # -# This version supports IDL >v6.0 +# This version supports IDL v6.1 # # (c) 1999, 2000 Carsten Dominik <domi...@astro.uva.nl> -# (c) 2001-2003 J.D. Smith <jdsm...@as.arizona.edu> +# (c) 2001-2004 J.D. Smith <jdsm...@as.arizona.edu> # # Requires the HTML documentation files distributed with IDL 6.0 or # later, decompiled from HTMLHelp idl.chm with Microsoft's HTML Help @@ -174,6 +174,22 @@ # The data which actually make it into the rinfo file include class, # type, routine,"kwds" with links, and the "extra" file and kwds with # links. Everything else is for internal linking. +# +#============================================================================ +# This file is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This file is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GNU Emacs; see the file COPYING. If not, write to the +# Free Software Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 02111-1307, USA. #============================================================================ require 5.004; @@ -227,6 +243,7 @@ open RINFO,">$rinfofile" or die "Cannot open $rinfofile for writing: $!"; # Scan all of the files FILE: foreach $file (@files) { +# next unless $file=~/^CDF.*\.html/; open(FILE,"$path/$file"); local $/=undef; #Slurp mode $file_contents=<FILE>; @@ -320,7 +337,7 @@ foreach $file (@files) { # Require a proper heading if (!$syntax && $part=~/^Syntax/) { $syntax=$parts{$part}; - } elsif ($part=~/^((?:[A-Z][a-z0-9_]+\s+){0,2}Keywords| + } elsif ($part=~/^((?:[A-Z][A-Za-z0-9_]+\s+){0,2}Keywords| Keywords:\s*(?:[A-Z][a-z0-9_]+\s*){1,2})\s*$/x) { $kwds.=$parts{$part} unless $1=~/Thread Pool Keywords/; # Nothing useful in TPool @@ -337,7 +354,6 @@ foreach $file (@files) { # Apply the special syntax matchers $old_syntax=$syntax; &try_specials(); - diag("$syntax\n"); # See if there are reasons to reject or complain about this entry if (@rejects) { @@ -358,7 +374,7 @@ foreach $file (@files) { # Parse the text keywords, and compare to syntax keywords. @syntax_kwds=make_unique(@syntax_kwds); %txt_kwds=parse_keywords($kwds); - + #diag("GOT KWDS SECTION:\n >>>$kwds\n<<<\n\n"); # Complain strenuously if (@complaints) { @@ -367,8 +383,6 @@ foreach $file (@files) { complain($old_syntax,$file,@complaints); } - diag(" SYNTAX KWDS FOUND: \n ".join("\n ",sort @syntax_kwds)."\n"); - diag(" TEXT KWDS FOUND: \n ".join("\n ",sort keys %txt_kwds)."\n"); diag("\n$file($title):<<<<<<<<<<<<<<<<<<\n\n\n"); # Normalize the class/routine cases @@ -461,8 +475,6 @@ foreach $class (keys %properties) { } } -diag("IDLitDataContainer:".Dumper($e{"IDLitDataContainer"})."\n"); - foreach $class (keys %e) { next unless $class; $iname = case_name("method","Init"); @@ -539,18 +551,28 @@ foreach $class (keys %e) { next unless defined($e{$class}{$type}{$name}{add_kwds}); foreach my $add (@{$e{$class}{$type}{$name}{add_kwds}}) { - my (%nokeys,$file_to_add,$keys_to_add,$special_to_add); - my ($has,$aname,$atype,$aclass,$nokeys,$special_sec,$getset)=@$add; + my (%nokeys,%only_keys,$file_to_add,$keys_to_add,$special_to_add); + my ($has,$aname,$atype,$aclass,$only_keys,$nokeys, + $special_sec,$getset)=@$add; # Does it already have the keywords, just requiring a link $has=$has eq "has"; - # Is it a real entry being asked for? + + # Check if we didn't know the routine type at the time of + # addition; default to "pro" if it exists. + unless ($atype) { + if (defined($e{$aclass}{"pro"}{$aname})) { $atype="pro" } + elsif (defined($e{$aclass}{"fun"}{$aname})) { $atype="fun"}; + } + + # Is it a actual existing entry being asked for? next unless defined($e{$aclass}{$atype}{$aname}); foreach (@$nokeys) {$nokeys{$_}++;} - + foreach (@$only_keys) {$only_keys{$_}++;} + my $afile=$e{$aclass}{$atype}{$aname}{file}; - # Look for special section keywords only (e.g. "graphics - # keywords accepted by PLOT") + # Look for "2 degree of separation" special section keywords + # only (e.g. "graphics keywords accepted by PLOT") if($special_sec and defined($e{$aclass}{$atype}{$aname}{extra})) { foreach $file (keys %{$e{$aclass}{$atype}{$aname}{extra}}) { if($e{$aclass}{$atype}{$aname}{extra}{$file}{special} =~ @@ -580,8 +602,10 @@ foreach $class (keys %e) { # Actually add the keys (moving unlinked ones to extra if necessary) foreach (keys %$keys_to_add) { - next if $nokeys{$_}; - if($has) { #exists in kwds already, move and link it + next if (@$only_keys && !$only_keys{$_}) || $nokeys{$_}; + if($has) { + #demand that it exists in kwds already + # if so, move and link it in the extra kwds section next if !defined($e{$class}{$type}{$name}{kwds}{$_}) or $e{$class}{$type}{$name}{kwds}{$_}; # Leave an existing link delete $e{$class}{$type}{$name}{kwds}{$_}; # Move to extra keywords @@ -626,8 +650,8 @@ diag(sprintf("\nProblematic entries: %d rejected, %d complains.\n", write_rinfo_header(); print RINFO "(defconst idlwave-system-routines\n"; print RINFO " '(\n"; -printf STDERR "\n Nr Class Npro Nfun Ntot Nkwd\n"; -printf STDERR "-----------------------------------------------\n"; +printf STDERR "\n Nr Class Npro Nfun Ntot Nkwd\n"; +printf STDERR "----------------------------------------------------------\n"; $classcnt = -1; foreach $class (sort ignoring_case keys %e) { $npro = scalar(keys %{$e{$class}{"pro"}}); @@ -647,7 +671,7 @@ foreach $class (sort ignoring_case keys %e) { $nprotot += $npro; $nfuntot += $nfun; $nclass++; - printf STDERR "%3d %-21s %4d %4d %4d %5d\n", + printf STDERR "%3d %-32s %4d %4d %5d %5d\n", ++$classcnt,$class,$npro,$nfun,$npro+$nfun,$nkwd; foreach $type ("pro","fun") { foreach $name (sort ignoring_case keys %{$e{$class}{$type}}) { @@ -656,8 +680,8 @@ foreach $class (sort ignoring_case keys %e) { } } -print STDERR "-" x 47,"\n"; -printf STDERR "Total %4d %4d %5d %5d\n", +print STDERR "-" x 58,"\n"; +printf STDERR "Total %4d %4d %5d %5d\n", $nprotot,$nfuntot,$nprotot+$nfuntot,$n_keywords_total; printf STDERR "Routines ignored due to -xname: %4d\n",$ignore_name_cnt if $ignore_name_re; @@ -768,7 +792,7 @@ sub clean_up_syntax { $syntax=~s|<h5\s+class="p?Heading4">.*?^</h5>\s*|or|msg; # And headings $syntax=~s/and then,.*//s; $syntax=~s/<br>/\n/g; - $syntax=~s/<.*?>//g; # html tags + $syntax=~s/<.*?>//gs; # html tags $syntax=~s/ / /g; # special chars $syntax=~s/—/ - /g; $syntax=~s/ / /g; @@ -779,7 +803,7 @@ sub clean_up_syntax { $syntax=~s/{(?!(?:,|X\s*\|\s*Y))[^}]*}//g; # Internal notes in {} $syntax=~s/&([gl])t;/$1=="g"?">":"<"/eg; # > and < $syntax=~s|^\s*or\s*||si; - diag("Now using cleaned syntax:>>>>>>>>\n$syntax\n<<<<<<<<<<\n"); +# diag("Now using cleaned syntax:>>>>>>>>\n$syntax\n<<<<<<<<<<\n"); } sub parse_syntax { @@ -807,7 +831,9 @@ sub parse_syntax { } else {diag("parse_syntax: No CLASS found\n");} # Clean out the remarks about BLABLA keywords - $txt =~ s/^.*?\bkeywords\b.*?://gmi; + $txt =~ s/^.*?\bkeywords\b.*?:.*?this.*?accepts.*?keywords.*$//gmi; + $txt =~ s/^.*?\bkeywords\b.*?://gmi; #Some have keywords after them. + # Clean out the "only in..." stuff $txt =~ s/\(only\s*in[^\)]*\)//gi; @@ -820,8 +846,8 @@ sub parse_syntax { # Get all keywords. Keywords are things with `/' before it or with `=' # after it. - diag("TESTING:\n\n$txt\n\n"); - while ($txt =~m!(\{([/XYZ |]*)\})? # Leading XYZ multiplexer + diag("AFTER SYNTAX CLEANUP:\n\n$txt\n\n"); + while ($txt =~m!(\{[/XYZ |]*\}|/?\[XYZ?\])? # Leading XYZ multiplexer (\/)? # Possibly a boolean \b (_?[A-Z][A-Z0-9_]*) # The actual keyword @@ -829,16 +855,16 @@ sub parse_syntax { \s* (=)?!gx ) { - ($xyz,$slash,$identifier,$getset,$equal) = ($2,$3,$4,$6,$7); + ($xyz,$slash,$identifier,$getset,$equal) = ($1,$2,$3,$5,$6); next unless $slash || $equal || ($xyz && $xyz=~m|/|); - # Everything before the first keyword is part of the calling sequence $call = $` unless $call; # Some keywords have a {X|Y|Z} in front which must be expanded if ($xyz) { - @ids = map {tr|/||d; $_ . $identifier} (split(/ *\| */,$xyz)); + $xyz=~tr|XYZ||cd; + @ids=map {$_ . $identifier} split(/(?=[XYZ])/,$xyz); } else { @ids = ($identifier); } @@ -890,7 +916,8 @@ sub parse_syntax { # Parse text into "Heading" Sections, optionally doing something with # the parsed text in a callback. If the callback is passed, it's # return value is tested, and, if a true list, that link is saved for -# each name on the list for return. +# each name on the list for return. If the return value is the scalar +# -1, no more processing is done. sub parse_heading { my ($txt,$heading,$callback)=@_; my %ret,@ret; @@ -908,7 +935,9 @@ sub parse_heading { while (my ($link,$name,$text)=splice(@sections,0,3)) { if($callback) { my @ret=&$callback($link,$name,$text); - if (@ret and $ret[0]) { # More than one name returned + if(@ret) { + last if $ret[0]==-1; + next if $ret[0]==1; map {$ret{$_}=$link} @ret; } } else { $ret{$name}=$link; } @@ -927,18 +956,50 @@ sub parse_keywords { sub { my ($link,$kwd,$text) = @_; my @ret; - return 0 if ($kwd=~/Keywords?/); - return 0 unless + if ($kwd=~/WIDGET_CONTROL\s+Keywords/i) { + push @complaints, + "WIDGET_CONTROL keywords listed as regular keyword."; + return -1; # Abort further processing + } + if ($kwd=~/Keywords?/) { + # Some "extra" keywords are annoyingly listed in + # amidst the regular keywords + my @all=($text=~m{[iI]n\s+addition.*?the\s+ + ((?:[A-Z0-9_]+,\s+)+) + and\s+([A-Z0-9_]+)\s+keywords?\s+to\s+ + the\s+([:A-Z0-9_]+)\s+(pro|fun)}xs); + diag("EXTRA KEYWORDS, Got: \n$text\n\n"); + return 1 unless @all; + + my $type=pop @all; + my ($class,$routine); + $routine=pop @all; + unshift @all,split(/,\s+/,shift @all); + push @complaints, + "Extra keywords to link listed as normal keyword: " . + "$routine: ". join(",",@all); + ($class,$routine)=($routine=~/(?:([a-z][a-z0-9_]+)::?)? + ([a-z][a-z0-9_]+)/ix); + push @add_keywords,["has",$routine,$type,$class]; + return 1; + } + + # Take care of space surrounding underscores here too + $kwd=~s/([A-Z])(?: +_|_ +)([A-Z])/$1_$2/g; + return 1 unless $kwd =~ m{( # The entire keyword text - (?:(\[XYZ?\])?(\!?[A-Z0-9-_]+)# opt. [XYZ] - (?![a-z]) # No lowercase lets - (?:\s*,\s*)? # optional comma - )+) # >=1 of the above + (?:(\[XYZ?\](_?))? # opt. [XYZ] + (\!?[A-Z][A-Z0-9-_]*) # keyword + (?![a-z]) # No lowercase lets + (?:\s*,\s*)? # optional comma + )+) # >=1 of the above (?:\s*\((Get|Set|Get\s*,\s*Set)\)\s*)? # (Get,Set)? }gxs; - my ($kwd,$xyz,$key_base,$getset)=($1,$2,$3,$4); + my ($kwd,$xyz,$underscore,$key_base,$getset)= + ($1,$2,$3,$4,$5); if ($xyz) { + $key_base="_".$key_base if $underscore; $xyz=~tr/XYZ//cd; @ret=map {$_ . $key_base} split(/(?=[XYZ])/,$xyz); } else { @@ -948,7 +1009,7 @@ sub parse_keywords { push @setkwds,@ret if $getset =~ /set/i; return @ret; }); - +# print "GOT KWDS: ",Dumper(%kwds),"\n"; my (@missing_from_syntax,@missing_from_text,%syntax_kwds); foreach $kwd (@syntax_kwds) { @@ -967,6 +1028,9 @@ sub parse_keywords { push @complaints,"Text keywords missing from syntax: " . join(",",@missing_from_syntax) if @missing_from_syntax; + diag(" SYNTAX KWDS FOUND: \n ".join("\n ",sort @syntax_kwds)."\n"); + diag(" TEXT KWDS FOUND: \n ". join("\n ",sort keys %kwds)."\n"); + # No link found in text, but add the missing keywords from Syntax # anyway. map {$kwds{$_}=""} @missing_from_text; @@ -1041,7 +1105,7 @@ sub make_lisp_reader_string { my $call=$e{$class}{$type}{$name}{call}; my ($entry,$kwds); - diag("STARTING $name CALL WITH: $call\n"); +# diag("STARTING $name CALL WITH: $call\n"); # In the calling sequence we want `%s' instead of name and class. # The calling sequence will later be used as format string to make # a calling sequence with the correct version of class and name. @@ -1052,7 +1116,7 @@ sub make_lisp_reader_string { $call =~ s/\b$name\b/%s/gi; } - diag("TESTING CALL WITH: $call\n"); +# diag("TESTING CALL WITH: $call\n"); # Now we make the string which can be parsed by the Lisp reader # It looks like this: # ("NAME" TYPE "CLASS" (system) "CALLING SEQUENCE" @@ -1102,7 +1166,7 @@ sub write_rinfo_header { print RINFO <<EOF; ;;; idlw-rinfo.el --- Routine Information for IDLWAVE ;; Copyright (c) 1999 Carsten Dominik -;; Copyright (c) 1999, 2000, 2001, 2002, 2003 Free Software Foundation +;; Copyright (c) 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation ;; Author: J.D. Smith <jdsmith\@as.arizona.edu> ;; Version: VERSIONTAG @@ -1385,19 +1449,28 @@ sub try_specials { # corresponding keywords to the routine description as well, using the # @add_keywords array, with format: # -# [ ("needs"|"has"),name,type,class,no_keys_reference,specials_only,get_set ] +# [ has,name,type,class,only_keys,allbut_keys,specials_only,get_set ] +# +# has: The string "needs" or "has" depending on whether the routine +# being scanned "needs" keywords added, or already "has" them, but +# just requires linking to the proper section. # -# "needs" or "has": depending on whether the routine being scanned -# "needs" keywords added, or already "has" them, but just requires -# linking to the proper section. # name: The name of the add routine +# # type: The type of the add routine +# # class: The class (if any) of the add routine -# no_keys_reference: A reference to a list of keywords *not* to add. +# +# only_keys: A reference to a list of keywords to add +# +# allbut_keys: A reference to a list of keywords *not* to add. +# # specials_only: A string mentioning one of the special keyword # section names (see %special_sections below), which will limit # added keywords to those with the named special section origin -# (e.g. "Graphics"). +# (e.g. "Graphics"), which are actually separated by 2 degrees +# from the original file (e.g. Surface->Plot->Graphics Keywords). +# # get_set: Either omit, or use one of "get","set", or "get,set" to # specify we need to link that type of keyword from the named # routine. @@ -1602,7 +1675,7 @@ BEGIN { sub { if ($syntax =~ /ATAN.*ATAN/s) { # Write a simpler calling sequence - $syntax = "Result = ATAN([Y,] X)"; + $syntax = "Result = ATAN([Y,] X [, /PHASE] )"; $act = 1; } else { 0; @@ -1745,6 +1818,20 @@ BEGIN { # ------------------------------------------------------------------ # Attach or link keywords from other routines ---------------------- # ------------------------------------------------------------------ + sname("ACCEPTS ALL XXX keywords") => + sub { + my ($routine,$class); + while ($syntax=~/Accepts all ([A-Z_0-9:]+) keywords/gi) { + $act=1; + $routine=$1; + ($class,$routine)= + ($routine=~/(?:([a-z][a-z0-9_]+)::?)?([a-z][a-z0-9_]+)/i); + push @complaints,"Keyword list not complete ($2 keywords omitted)."; + push @add_keywords,["needs",$routine,undef,$class]; + } + return $act; + }, + sname("SURFACE,CONTOUR,PLOT_3DBOX (ADD PLOT KEYWORDS)") => sub { if ($syntax =~ /^\s*(SURFACE|CONTOUR|PLOT_3DBOX)/si) { @@ -1765,18 +1852,6 @@ BEGIN { } }, - sname("IDLitComponent::Get|SetPropertyAttribute") => - sub { - if ($syntax =~ /\s*IDLitComponent::\]?(Get|Set)PropertyAttribute/si) { - # Must add all the get|set keywords from RegisterProperty - push @add_keywords,["needs","RegisterProperty","pro","IDLitComponent", - undef,undef,$1]; - $act=1; - } else { - 0; - } - }, - sname("POLAR_CONTOUR (LINK CONTOUR KEYWORDS)") => sub { if ($syntax =~ /^\s*POLAR_CONTOUR/si) { @@ -1789,7 +1864,7 @@ BEGIN { $nokeys =~ s/and//; my @nokeys = (split(/[^A-Z0-9_]+/,$nokeys)); shift @nokeys unless $nokeys[0]; # Useless material - push @add_keywords,["has","CONTOUR","pro","",\@nokeys]; + push @add_keywords,["has","CONTOUR","pro",undef,undef,\@nokeys]; } 1; } else { @@ -1797,7 +1872,19 @@ BEGIN { } }, - sname("TVSCL (ADD TV)") => + sname("IDLitComponent::Get|SetPropertyAttribute") => + sub { + if ($syntax =~ /\s*IDLitComponent::\]?(Get|Set)PropertyAttribute/si) { + # Must add all the get|set keywords from RegisterProperty + push @add_keywords,["needs","RegisterProperty","pro","IDLitComponent", + undef,undef,undef,$1]; + $act=1; + } else { + 0; + } + }, + + sname("TVSCL (ADD TV)") => sub { if ($syntax =~ /^\s*TVSCL/si) { # Most TV keywords need to be added later. @@ -1813,11 +1900,11 @@ BEGIN { sname("LINK SURFACE KEYWORDS") => sub { if ($kwds =~ m|\s+</a>SURFACE Keywords|) { - # All SURFACE keywords listed in syntax need to be added later. + # All SURFACE keywords listed in syntax need to be linked later. $act = 1; push @complaints, "Keyword list not complete (SURFACE kwds omitted)"; - push @add_keywords,["has","SURFACE","pro",""]; + push @add_keywords,["has","SURFACE","pro"]; 1; } else { 0; @@ -1886,6 +1973,30 @@ BEGIN { } }, + sname("SPACE(S) AROUND UNDERSCORE") => + sub { + if ($syntax =~ /[A-Z]( +_|_ +)[A-Z]/) { + $act = ($syntax =~ s/([A-Z]) *_ *([A-Z])/$1_$2/g); + push @complaints,"Space surrounding underscore"; + 1; + } else { + 0; + } + }, + + + sname("LOWERCASE KEYWORD") => + sub { + if ($syntax =~ /CDF_VARCREATE/) { + $act= ($syntax =~ s/VariableType/VARIABLETYPE/); + $kwds=~ s/VariableType/VARIABLETYPE/; # Ugghh + push @complaints,"Lowercase keyword found"; + 1; + } else { + 0; + } + }, + # ------------------------------------------------------------------ # Keyword omissions ------------------------------------------------ # ------------------------------------------------------------------ @@ -1961,6 +2072,19 @@ BEGIN { } else { 0; } + }, + + + sname("KILL DATAMINER INTRO") => + sub { + if ($syntax =~ /section shows the proper syntax for calling the method/) + { + $act=1; + $syntax=""; + 1; + } else { + 0; + } } );