text clean up for ID CS

I’ve made a script that will open a choosen word doc, save it as text, open in textwrangler, make some clean up for english text, make a new ID CS document and place that text in it and does a final find and replace.

Here is the script (which almost works)


--Selecting a word document
tell application "Finder"
	--activate
	set AppleScript's text item delimiters to ""
	set myWordFile to choose file with prompt "Choose a word document"
	set myName to name of myWordFile
	--set varfile to properties of myWordFile
	set varlength to length of myName
	if myName contains "." then
		set varoffset to offset of "." in myName
		set myNewName to characters 1 thru (varoffset - 1) of myName as string
	else
		set myNewName to myName as string
	end if
	
	
end tell

--Saving word doc as text only
tell application "Microsoft Word"
	--activate
	open myWordFile
	
	do Visual Basic "	ActiveDocument.SaveAs FileName:="" & myNewName & ".txt" & "", FileFormat:=wdFormatText, LockComments:=False, Password:="", AddToRecentFiles:=True, WritePassword :="", ReadOnlyRecommended:=False, EmbedTrueTypeFonts:=False, SaveNativePictureFormat:=False, SaveFormsData:=False, SaveAsAOCELetter:=False, HTMLDisplayOnlyOutput:=False"
	close window 1
end tell

-- english text cleanup
set myTextFile to {myNewName & ".txt"}

--The TextWrangler part
tell application "TextWrangler"
	--activate
	open file myTextFile
	replace " *\t *" using "\t" searching in text 1 of text window 1 options {search mode:grep, starting at top:true} --removes spaces around a tab
	replace "(\s)+" using "\1" searching in text 1 of text window 1 options {search mode:grep, starting at top:true} --replaces double spaces characters with single character (example:two tabs with one tab)
	replace "\.\.\." using "…" searching in text 1 of text window 1 options {search mode:grep, starting at top:true} --replaces 3 period with elipsis
	replace "[ \t]*([:;?!…%/])[ ]?" using "\1 " searching in text 1 of text window 1 options {search mode:grep, starting at top:true} --replaces one or more tab or space before and a space or more after with only a space after punctuation
	replace " - " using "–" searching in text 1 of text window 1 options {starting at top:true} -- replaces space-dash-space with en dash
	replace "(?<!\s)-(?=\d+)" using "–" searching in text 1 of text window 1 options {search mode:grep, starting at top:true} --replaces dash with en dash if it's between two digit
	replace " *-- *" using "—" searching in text 1 of text window 1 options {search mode:grep, starting at top:true} --replaces -- between spaces with em dash
	
	--replacing dumb quotes for smart quotes
	replace "(?<![0-9])"(?=\w)" using "“" searching in text 1 of text window 1 options {search mode:grep, starting at top:true}
	replace "(?<![0-9 ])"(?=[\s[:punct:]])" using "”" searching in text 1 of text window 1 options {search mode:grep, starting at top:true}
	replace "(?<![0-9 ])'" using "’" searching in text 1 of text window 1 options {search mode:grep, starting at top:true}
	replace "(?<=\s)'(?=.+)" using "‘" searching in text 1 of text window 1 options {search mode:grep, starting at top:true}
	
	replace "\s*/\s*" using "/" searching in text 1 of text window 1 options {search mode:grep, starting at top:true} -- removes spaces character around forward slash
	
	
	replace "—" using "<0x2009>—<0x2009>" searching in text 1 of text window 1 options {search mode:grep, starting at top:true} --puts thin spaces around em dash
	replace "(?<![0-9])–(?![0-9])" using "<0x2009>–<0x2009>" searching in text 1 of text window 1 options {search mode:grep, starting at top:true} -- puts thin spaces around en dash when not between two digits
	save window 1
end tell

--the InDesign part...
tell application "InDesign CS"
	set myDocument to make document
	tell page 1 of myDocument
		place myTextFile place point {"6p", "3p"}
	end tell
	search for "<0x2009>" replacing with "^<"
end tell

The problem is that when I get to textwrangler, and try to open the file, the finder cannot find that file. So what’s wrong with this script?

Thanks for any feedback!

It looks like you’re giving TextWrangler just the name of the file, not it’s full path. You’re doing the same thing when you save, but Word is presumably using the default path for you.

You should also get the Finder out of there; it’s serving no useful purpose. Try using something like this:


set myWordFile to choose file with prompt "Choose a word document"
set myWordFilePath to myWordFile as text
set oldDelims to AppleScript's text item delimiters
set AppleScript's text item delimiters to {":"}
set myFolder to (text 1 thru text item -2 of myWordFilePath) & ":"
set myName to text item -1 of myWordFilePath
set AppleScript's text item delimiters to {"."}
if (count of text items of myName) > 1 then
	set myName to (text 1 thru text item -2 of myName)
end if
set AppleScript's text item delimiters to oldDelims
set newDocPath to (myFolder & myName & ".txt")

– Shane Stanley

Thanks Shane,

Here is my “final” version if you or others have any comment please make them. It’s very appriciated.


set myWordFile to choose file with prompt "Choose a word document"
set myWordFilePath to myWordFile as text
set oldDelims to AppleScript's text item delimiters
set AppleScript's text item delimiters to {":"}
set myFolder to (text 1 thru text item -2 of myWordFilePath) & ":"
set myName to text item -1 of myWordFilePath
set AppleScript's text item delimiters to {"."}
if (count of text items of myName) > 1 then
	set myName to (text 1 thru text item -2 of myName)
end if
set AppleScript's text item delimiters to oldDelims
set newDocPath to (myFolder & myName & ".txt")
set myNewName to (myName & ".txt")

tell application "Microsoft Word"
	--activate
	open myWordFile
	do Visual Basic "	ActiveDocument.SaveAs FileName:="" & myNewName & "", FileFormat:=wdFormatText, LockComments:=False, Password:="", AddToRecentFiles:=True, WritePassword :="", ReadOnlyRecommended:=False, EmbedTrueTypeFonts:=False, SaveNativePictureFormat:=False, SaveFormsData:=False, SaveAsAOCELetter:=False, HTMLDisplayOnlyOutput:=False"
	close window 1 saving no
end tell

-- english text cleanup

--global myTextFile

--The TextWrangler part
tell application "TextWrangler"
	--activate
	open file newDocPath
	replace " *\t *" using "\t" searching in text 1 of text window 1 options {search mode:grep, starting at top:true} --removes spaces around a tab
	replace "([ \t])+" using "\1" searching in text 1 of text window 1 options {search mode:grep, starting at top:true} --replaces double spaces or tab with single character (example:two tabs with one tab)
	replace "(\r)+" using "\1" searching in text 1 of text window 1 options {search mode:grep, starting at top:true} --replaces double carriage return with one carriage return
	replace "\.\.\." using "…" searching in text 1 of text window 1 options {search mode:grep, starting at top:true} --replaces 3 period with elipsis
	replace "[ \t]*([:;?!…%/])[ ]?" using "\1 " searching in text 1 of text window 1 options {search mode:grep, starting at top:true} --replaces one or more tab or space before and a space or more after with only a space after punctuation
	replace " - " using "–" searching in text 1 of text window 1 options {starting at top:true} -- replaces space-dash-space with en dash
	replace "(?<!\s)-(?=\d+)" using "–" searching in text 1 of text window 1 options {search mode:grep, starting at top:true} --replaces dash with en dash if it's between two digit
	replace " *-- *" using "—" searching in text 1 of text window 1 options {search mode:grep, starting at top:true} --replaces -- between spaces with em dash
	replace " \r" using "\r" searching in text 1 of text window 1 options {search mode:grep, starting at top:true} --replaces -- between spaces with em dash
	
	--replacing dumb quotes for smart quotes
	replace "(?<![0-9])"(?=\w)" using "“" searching in text 1 of text window 1 options {search mode:grep, starting at top:true}
	replace "(?<![0-9 ])"(?=[\s[:punct:]])" using "”" searching in text 1 of text window 1 options {search mode:grep, starting at top:true}
	replace "(?<![0-9 ])'" using "’" searching in text 1 of text window 1 options {search mode:grep, starting at top:true}
	replace "(?<=\s)'(?=.+)" using "‘" searching in text 1 of text window 1 options {search mode:grep, starting at top:true}
	
	replace "\s*/\s*" using "/" searching in text 1 of text window 1 options {search mode:grep, starting at top:true} -- removes spaces character around forward slash
	
	
	replace "—" using "<0x2009>—<0x2009>" searching in text 1 of text window 1 options {search mode:grep, starting at top:true} --puts thin spaces around em dash
	replace "(?<![0-9])–(?![0-9])" using "<0x2009>–<0x2009>" searching in text 1 of text window 1 options {search mode:grep, starting at top:true} -- puts thin spaces around en dash when not between two digits
	close window 1 saving yes
end tell


--the InDesign part...
tell application "InDesign CS"
	set myDocument to make document
	set myTextFile to newDocPath
	tell page 1 of myDocument
		place myTextFile place point {"6p", "3p"}
		--get myTextFile
	end tell
	search for "<0x2009>" replacing with "^<"
	
end tell

I also have one for French, if you’re interested. It’s basicaly the find and replace in both TW and ID that change.