Hi,
I have a script that takes links from google search pages and stores the results in a list called urlList. The script then cURLs and compares the resulting web pages on the list against a list of terms in an array called urlArray . If the text in urlArray appear in the HTML of the pages the page address is added to the correct element of urlArray. The array is then written to a text file.
Although the script works, it takes about 5 mins or so on my computer to collect and search the HTML text of 100 pages for two search terms. I am hoping that it can be slightly faster than this.
Is there some way of making this script more efficient? Possibly a way to speed up refering to the lists?
Having searched of this forum I have come across this discussion ( http://bbs.applescript.net/viewtopic.php?t=6233 ) that seems to relate to this, but I must admit to getting a little bit lost. Is what is being said that you define the lists as properties and then just refer to them as “my ‘listName’” everywhere to make the script run much faster?
My script follows. Any help appreciated. Thanks
John M
set urlList to {}
set urlArray to {}
set theChunks to 10 -- either 10, 25, 50 or 100
set theRecords to 30 -- should be a multiple of theChunks var
set theSearchTerm to text returned of (display dialog �
"What search words to use? (use + as word spacer)" default answer "first+second+third" buttons [" OK "] default button 1)
set listNumber to 1
repeat -- with listNumber
set myText to (display dialog "What page text are you looking for?" default answer "text item " & listNumber buttons ["More", "Enough", "Cancel"] default button 1)
set urlArray to urlArray & {text returned of myText as list}
if button returned of myText is "Enough" then exit repeat
set listNumber to listNumber + 1
end repeat
set urlArray to urlArray & {"None of the search terms" as list}
--set theChunks to (choose from list {10, 25, 50, 100} default items {10} with prompt "choose how many records per page") as integer
set theRecords to text returned of (display dialog "Approximately how many web pages would you like to find?" default answer theRecords buttons ["OK"] default button 1) as integer
--repeat for required pages
repeat with myRecords from 0 to (theRecords - theChunks) by theChunks
--get search page code
set myCode to do shell script "curl -A "Mozilla/3.0 (Win95; I)" "http://www.google.com/search?q=" & theSearchTerm & "&num=" & theChunks & "&start=" & myRecords & """
--get urlList from theUrl
if (offset of "<a href=" in myCode) is not 0 then
set AppleScript's text item delimiters to {""}
set myDelimiters to AppleScript's text item delimiters
set AppleScript's text item delimiters to {"<a href="}
set anList to (text items of myCode) as list
set AppleScript's text item delimiters to myDelimiters
repeat with myLink from 2 to (length of anList)
set MyString to text item myLink of anList as string
if MyString starts with "http" then
set myUrl to (characters 1 thru ((offset of " " in MyString) - 1) of MyString) as string
if (offset of ">" in myCode) is not 0 then
set myUrl to (characters 1 thru ((offset of ">" in myUrl) - 1) of myUrl) as string
end if
set urlList to urlList & myUrl
end if
end repeat
else
return
end if
--end get urlList from theUrl
end repeat
--end repeat for required pages
--check urlList for searchText(s)
repeat with newUrl in urlList
if newUrl does not contain "google" then
try
set myNewCode to do shell script "curl -A "Mozilla/3.0 (Win95; I)" "" & newUrl & """
set searchFlag to false
repeat with myTextNum from 1 to ((length of urlArray) - 1)
if myNewCode contains item 1 of item myTextNum of urlArray then
set item myTextNum of urlArray to item myTextNum of urlArray & newUrl
set searchFlag to true
end if
end repeat
if searchFlag is false then set item (length of urlArray) of urlArray to item (length of urlArray) of urlArray & newUrl
end try
end if
end repeat
--end check urlList
-- save to file
set myFilePath to choose file name default name "result pages.txt"
set saveFile to (open for access myFilePath with write permission)
write "From search pages:" & return & "http://www.google.com/search?q=" & theSearchTerm & return as string to saveFile
repeat with n from 1 to (length of urlArray)
write return & "Pages containing: " & ((item 1 of item n of urlArray) as string) & return & return as string to saveFile
if (length of item n in urlArray) > 1 then
repeat with m from 2 to (length of item n in urlArray)
write ((item m of item n of urlArray) as string) & return as string to saveFile
end repeat
else
write "No pages" & return as string to saveFile
end if
end repeat
close access saveFile
--end save to file