Extensive document cleanup word 2008

Here’s my applescript for an extensive cleanup of a document, fixing errant spaces, em dashes, ellipses, and so on. It’s great for poorly formatted manuscripts and book text found on the internet. The “"” string is to find the " character; use \ to get applescript to ignore the quote as script but as text.

Enjoy,
Steve

tell application “Microsoft Word”
(* Fix various errors typical in books that are scanned such as em dashes, opening tabs, and more *)

activate
set docRange to text object of active document

--  ellipses  --
clear formatting of find object of docRange
execute find (find object of docRange) find text "." replace with "..." replace replace all --convert elipsis character to 3 periods 	
execute find (find object of docRange) find text ". . ." replace with "..." replace replace all --remove spaces from ellipses 
execute find (find object of docRange) find text ".. ." replace with "..." replace replace all --remove spaces from ellipses 
execute find (find object of docRange) find text ". .." replace with "..." replace replace all --remove spaces from ellipses 
execute find (find object of docRange) find text "... ." replace with "..." replace replace all --change 4 to 3 periods 
execute find (find object of docRange) find text "...." replace with "..." replace replace all --change 4 to 3 periods 
execute find (find object of docRange) find text " ..." replace with "..." replace replace all --remove leading space
execute find (find object of docRange) find text "..." replace with "... " replace replace all --add trailing space
execute find (find object of docRange) find text "^p...^w" replace with "^p..." replace replace all --remove trailing space at the beginning of a paragraph
execute find (find object of docRange) find text "...^w?" replace with "...?" replace replace all --remove trailing space in a ?
execute find (find object of docRange) find text "\"^w...^w" replace with "\"..." replace replace all --remove leading space in a quote
execute find (find object of docRange) find text "...^w\"" replace with "...\"" replace replace all --remove trailing space in a quote

-- em dashes--
clear formatting of find object of docRange
execute find (find object of docRange) find text "^~" replace with "-" replace replace all -- nonbreaking hyphen
execute find (find object of docRange) find text "^-" replace with "-" replace replace all -- optional hyphen
execute find (find object of docRange) find text "--" replace with "^+" replace replace all -- double dash
execute find (find object of docRange) find text ",^+" replace with "^+" replace replace all -- comma dash
execute find (find object of docRange) find text " - " replace with "^+" replace replace all -- space dash space
execute find (find object of docRange) find text "- " replace with "^= " replace replace all -- dash space
execute find (find object of docRange) find text " -" replace with " ^=" replace replace all -- space dash 
execute find (find object of docRange) find text "^p-" replace with "^p^+" replace replace all --starting dash
execute find (find object of docRange) find text "-^p" replace with "^+^p" replace replace all --ending dash
execute find (find object of docRange) find text "-\"" replace with "^+\"" replace replace all -- dash quote
execute find (find object of docRange) find text "\"-" replace with "\"^+" replace replace all -- quote dash
execute find (find object of docRange) find text " ^+" replace with "^+" replace replace all -- space dash
execute find (find object of docRange) find text "^+ " replace with "^+" replace replace all -- dash space
execute find (find object of docRange) find text "^+-" replace with "^+" replace replace all -- em plus hyphen
execute find (find object of docRange) find text "^+^+" replace with "^+" replace replace all -- duplicates

--odd quotes
--replace slanted single quote 
execute find (find object of docRange) find text "`" replace with "'" replace replace all
--replace double single quotes 
execute find (find object of docRange) find text "''" replace with "\"" replace replace all

--other stuff
execute find (find object of docRange) find text ";\"" replace with ",\"" replace replace all
execute find (find object of docRange) find text ";'" replace with ",\"" replace replace all

-- remove leading and trailing tabs and spaces --
clear formatting of find object of docRange
execute find (find object of docRange) find text " \"^p" replace with "\"^p" replace replace all -- leading space
execute find (find object of docRange) find text " ," replace with "," replace replace all --remove leading space
execute find (find object of docRange) find text " ." replace with "." replace replace all --remove leading space
execute find (find object of docRange) find text " ?" replace with "?" replace replace all --remove leading space

execute find (find object of docRange) find text "^p^t" replace with "^p" replace replace all -- remove leading tabs
execute find (find object of docRange) find text "^t^p" replace with "^p" replace replace all --remove trailing tab
execute find (find object of docRange) find text "^p^w" replace with "^p" replace replace all -- remove white space
execute find (find object of docRange) find text "^w^p" replace with "^p" replace replace all -- remove white space
-- remove all double spaces --
execute find (find object of docRange) find text " ^w" replace with " " replace replace all
-- remove extra returns --
execute find (find object of docRange) find text "^p^p^p^m" replace with "^p^m" replace replace all
execute find (find object of docRange) find text "^p^p^m" replace with "^p^m" replace replace all

end tell