# ws.opt - rbmake options and rules for converting Baen WebScription books # # You use this file like this (once for each ISBN book number): # # rbmake -L ISBN -l ws.opt # # All comments in the options section must occupy a full line. # These substitution rules allow the user to specify the ISBN number # either with or without dashes, and we handle the rest. They cause # the options that follow to get tweaked according to these rules. "Set-Info": { s/(ISBN=\d)(\d{3})(\d{5})([\dX])/$1-$2-$3-$4/; s/(isbn-\d+)-(\d+)-(\d+)-([\dX]+)/$1$2$3$4/; } "Book-Filename": "Cover-Image": "Input-File": "Exclude-URLs-Matching": "Menu-Item": { s/(\d+)-(\d+)-(\d+)-([\dX]+)/$1$2$3$4/; } "Cover-Image": { s/0671062000\.jpg//; } -- End of optional substitution section --- # Note that the _Put_Title_Here_ filename causes rbmake magic to happen # (i.e. the actual title grabbed from the HTML is used for the filename). # If you want to use the ISBN number for the book's name, change it to $1. Book-Filename: _Put_Title_Here_ Set-Info: URL=ebook:isbn-$1 Set-Info: ISBN=$1 Cover-Image: $1.jpg Input-File: $1_toc.htm Input-File: $1__c_.htm Follow-Links: yes Include-Images: yes Include-Audio: no Accept-URLs-Matching: Auto-Accept-Input-File-Dirs: yes Exclude-URLs-Matching: */$1.jpg Exclude-URLs-Matching: */$1.htm Exclude-URLs-Matching: */order_btn.gif Menu-Item: Contents=$1_toc.htm Use-Book-Paragraphs: yes Enhance-Punctuation: "'- Allow-Old-Style-Page-Breaks: no Page-Joining: all Image-Edge-Enhancement: 7 (none, 1 - 9) Image-Edge-Enhancement: 9 = *_m[\d_].jpg Substitution-Rule-File: - --- Substitution-Rule Data Follows --- # This rule data will tweak some things in a Baen WebScription book before # the rbmake HTML parser takes over and turns it into an .rb file. If a # page matches more than one section, the rules are guaranteed to be run # in order. # Fix some typos (some of which have already been fixed in the latest # webscription versions -- re-download your older books when in doubt). "*/0671578871_toc.htm": { s/>Sentry PeekSentry Peak)%$1\n%; } # Fix the scene breaks in 1632's chapter files. m"/0671578499_+\d+\.htm$": { s/(
\*\s+\*\s+\*)t\s+t\s+t/$1/g; } # Fix the scene breaks in The Legend That Was Earth's chapter files # plus chapter 32 of The Vlad Tapes. m"/0671319450_+\d+\.htm$": "*/0671578782__32.htm": { s/(
\*\s+\*\s+\*)\?/$1/g; } # Fix one scene break in The Philosophical Strangler's chapter 12 file. "*/0671319868__12.htm": { s/(
\*\s+\*\s+\*)u\s+u\s+u/$1/g; } # Fix one scene break in Seer's Blood's chapter 22 file. "*/0671578774__22.htm": { s/(
\*\s+\*\s+\*)Nn/$1/g; } # Fix one scene break in The Fata Morgana's chapter 9 file. "*/0671578766___9.htm": { s%(\*\s+\*\s+\*)%$1%g; } # These rules affect all the HTML files. "*.htm": { # Get rid of some useless NAME/ID tags. s%%%ig; # Make sure ellipses stay as separate chars & don't break across lines. s{\. \. \.} {.\xA0.\xA0.}g; s{\.\xA0\. \.} {.\xA0.\xA0.}g; # Dump a couple things that just clutter up the page. s%]*src *= *"06\d\d\d\d\d\d\d[\dX]\.jpg[^>]*>\s*(?:
)?%%i; s%]+>(Back|Next|Framed|Contents)\s*(\| *|
)?%%ig; s%

\s*

%%ig; # Put the maps in the "Go To" menu. s{($1\n}sig; # If there's only one map, don't call it "Map #1". s/CONTENT="Map #[1_]=/CONTENT="Map=/; s/(CONTENT="Map)(=.*CONTENT="Map #2=)/$1 #1$2/s; } # These rules are only for the chapter files. /_[p\d]+\.htm$/: { # Remove the trailing per-chapter copyright or just the body-ending stuff. s%(?:

]*>(?: )?

\s*)*\s*
.*%%is; s%\n?<\/body>.*%\n%is; # Transform the scene breaks into
tags s{\n?

]*>(\s+| |\xA0)*(\*+(\s+| |\xA0)*)*(

)?\s*\n} {
\n}igs; s{\n?
(\s+| |\xA0)*(\*+(\s+| |\xA0)*)*
\s*} {
\n}igs; s{\n?
(\s+| |\xA0)*(\*+(\s+| |\xA0)*)*
\s*} {
\n}igs; s%(
\n?){2,}%
\n%g; s%(\n?)
%$1%ig; s%
(\n?<(h[1-6]|blockquote))%$1%ig; s%(
)\n?
\n?%$1\n%ig; s%\n?
\s*$%\n%is; # If you like your paragraphs fully justified, uncomment this line. #s%]+>%

%ig; s%&([a-z]+=)%&$1%g; } # Try to fix the extraneous-BR-at-chapter-start bug in a few books. m"/0671(319639|578405|578545|578758|578855)_+\d+\.htm$": { s%(\s*

]*>[^<]+)
%$1%s; } # Fix a few things in Change of Command. "*/0671319639___4.htm": "*/0671578405___4.htm": { s%(sapphire).*?(seas)%$1 $2%s; } "*/0671319639__10.htm": "*/0671578405__10.htm": { s%(Yes\.\s+)if( it were)%$1If$2%; } "*/0671319639__13.htm": "*/0671578405__13.htm": { s%posssibilities%possibilities%; } "*/0671319639__15.htm": "*/0671578405__15.htm": { s%(Jurowski), (but Martin-Jehore)%$1--$2%; } "*/0671319639__19.htm": "*/0671578405__19.htm": { s%(All )though( the questions that)%$1through$2%; } # Improve the names of the "Go To" menu items in Ashes of Victory. m"/0671578545__(c_|49|50)\.htm$": { s/Map #1/Diagram #1/; s/Map #2/Diagram #2/; s/Map #3/Map/; # Kludge around a broken link (in an older version of the book). s/(0671578545)(_m3\.)/$1_$2/; } # Rewrite the Table of Contents page. "*_toc.htm": { # Put common prefixes (The, A, An) at the end of the title s%()(An?) (.*?)(?=)%$1$3, $2%i; s%()The (.*?)(?=)%$1$2, The%i; s%(Apocalypse Troll)(?=)%$1, The%i; # Generate an author META tag so that rbmake knows who the author is. s{(\n$1$2}i; } # Rewrite the copyright page. "*_c_.htm": { # Get rid of all the CENTER tags, since they are often bogus. s%%%g; # This fixes some malformed tags. s%($2%s; s%($2%s; }