applescript with html source

:D:D
i am dazzled.
i can find nothing wrong, no errors. :slight_smile: i’m happy

as far as the repeating, i plan to run this daily to update the calendar for maybe 3 months ahead (say 93 days). as it is, when i run the script twice for the same date, it duplicates the events for that date. running it 3 times gives 3 identical events, and so on. so if i run this script everyday to update the next 93 days…i think you see where I’m going ;). hopefully it can ignore already existing iCal events…

An “if” statement will take care of pre-existing events :slight_smile: I’ll do that later, but if you would edit and test this that would be great:

Near the end of the script above the iCal routine:

This should return only the info to the info field. I tested it on 3 pages without error.
SC

Wow. It works!


set monthList to {January, February, March, April, May, June, July, August, September, October, November, December}

set URLBase to "http://intranet.risd.edu/ContentAdministrator/calendar/view/day.asp?"

--Enter date to begin on
set BaseDate to text returned of (display dialog "Enter a start date for calendar events" & return & "Format as 1/1/2005" default answer "1/1/2005" as string)

--Create address of first page
set AppleScript's text item delimiters to "/"
set URLMarkup to "month=" & text item 1 of BaseDate & "&day=" & text item 2 of BaseDate & "&year=" & text item 3 of BaseDate
set AppleScript's text item delimiters to ""

set BaseDate to date BaseDate

set FinalDate to text returned of (display dialog "Ready to copy events. Enter the last date, format as  1/1/2005" default answer "1/1/2005")

repeat
	set CurrentURL to URLBase & URLMarkup
	
	--Get URL and wait for page to load
	tell application "Safari"
		open location CurrentURL
		repeat
			set WinName to name of window 1
			if WinName does not contain "Loading" then exit repeat
		end repeat
	end tell
	
	delay 1 --Rest after page loads
	
	
	--Extract bold typed headings from and text of document
	tell application "Safari"
		set InitialEventList to (paragraphs of text of document 1 whose font is "Verdana-Bold")
		set PageContent to text of document 1
		set InfoParagraphs to attribute run of text of document 1
	end tell
	
	--Parse event list
	set EventList to {}
	repeat with TheEvent in InitialEventList
		if (TheEvent as string) is not (ASCII character 10) then copy (TheEvent as string) to the end of EventList
	end repeat
	
	--Build event data list
	set TextBlocks to paragraphs of PageContent
	
	
	set NoLocation to "To be announced"
	set CurrentItem to 1
	--Order of List is Event name, Location, Date and start time, End time
	
	set CurrentEventData to {}
	set CurrentEventcount to 1
	
	repeat with ThisBlock in TextBlocks
		--If no location is found before the date, thelocation is not set
		if ThisBlock contains "Location:" then set theLocation to (ThisBlock as string)
		
		
		--set Location routine. "Date" is the flag to start list since "Location" is not always present
		if (ThisBlock as string) contains "Date:" then
			--copy the  Event name
			copy (item CurrentItem of EventList) to the end of CurrentEventData
			--copy location or no location
			try
				copy theLocation to the end of CurrentEventData
			on error
				copy NoLocation to the end of CurrentEventData
			end try
			--Will grab next event with increment
			set CurrentItem to CurrentItem + 1
			--Create date string variable
			set theDateString to (ThisBlock as string)
		end if
		
		--Next is date
		if ThisBlock contains "Time:" then
			set theDate to (date (theDateString & (ThisBlock as string)))
			copy theDate to the end of CurrentEventData
			
			--Then the end time 
			set AppleScript's text item delimiters to "-"
			set StartTime to text item 1 of ThisBlock as string
			set EndTime to text item 2 of ThisBlock as string
			--The case of an event occuring past midnight
			set AppleScript's text item delimiters to ""
			if StartTime contains "PM" and EndTime contains "AM" then
				
				set AppleScript's text item delimiters to "/"
				
				set theMonth to (text item 1 of theDateString)
				set NewDay to (text item 2 of theDateString) + 1
				set theYear to (text item 3 of theDateString)
				
				set AppleScript's text item delimiters to ""
				--Correct the day in the datestring
				set theDateString to theMonth & "/" & NewDay & "/" & theYear
				
			end if
			
			set EndTime to (date (theDateString & (EndTime)))
			copy EndTime to the end of CurrentEventData
			
		end if
		
		set currentcount to (count items of CurrentEventData) as number
		--When the 4 properties fill the list, send them to ical for processing
		
		if currentcount is 4 then
			
			set CurrentInfo to {}
			
			set CurrentEvent to (item CurrentEventcount of EventList) as string
			set copyFlag to false
			
			
			repeat with ThisParagraph in InfoParagraphs
				
				if (ThisParagraph & (ASCII character 10)) contains CurrentEvent and (ThisParagraph as string) is not (ASCII character 10) then
					set copyFlag to true
				else if the copyFlag is true and ThisParagraph contains "Date:" then
					exit repeat
				end if
				
				if copyFlag is true then copy ThisParagraph as string to the end of CurrentInfo
			end repeat
			
			set CurrentEventcount to CurrentEventcount + 1
			
			copy (CurrentInfo as string) to the end of CurrentEventData
			
			tell application "iCal"
				--Routine that checks for pre-existing events
				try
					set ExitFlag to false
					set this_calendar to (the first calendar whose title is "Events Cal")
					set EventNameList to summary of events of this_calendar
					repeat with thisname in EventNameList
						if thisname contains item 1 of CurrentEventData then
							set ExitFlag to true
							exit repeat
						end if
					end repeat
				end try
				
				if ExitFlag is true then exit repeat
				--Add events to calendar
				try
					set this_calendar to (the first calendar whose title is "Events Cal")
					tell this_calendar to set this_event to make new event at the end of events with properties ¬
						{summary:(item 1 of CurrentEventData), location:(item 2 of CurrentEventData), start date:(item 3 of CurrentEventData), end date:(item 4 of CurrentEventData), description:(item 5 of CurrentEventData)}
				on error
					--Create calendar on first run
					make new calendar at end of calendars with properties {title:"Events Cal"}
					set this_calendar to (the first calendar whose title is "Events Cal")
					tell this_calendar to set this_event to make new event at the end of events with properties ¬
						{summary:(item 1 of CurrentEventData), location:(item 2 of CurrentEventData), start date:(item 3 of CurrentEventData), end date:(item 4 of CurrentEventData), description:(item 5 of CurrentEventData)}
				end try
			end tell
			set CurrentEventData to {}
			set theLocation to ""
		end if
		
	end repeat
	
	delay 1 --rest again
	
	tell application "Safari" to close window 1
	
	
	if BaseDate is date FinalDate then
		display dialog "Final date " & FinalDate & " has been reached"
		exit repeat
	end if
	
	set BaseDate to (BaseDate) + 24 * hours
	
	
	repeat with i from 1 to 12
		if BaseDate's month = item i of monthList then
			set theMonth to i as number
			exit repeat
		end if
	end repeat
	
	
	set URLMarkup to "month=" & theMonth & "&day=" & day of BaseDate & "&year=" & year of BaseDate
end repeat

The only thing I think is left is the dates are sometimes “open” or “00:00” both of which get interpreted as 12:00 AM to 12:00 PM. Also, since an event is skipped if it already exists, if that event gets updated on the webpage you won’t get the update. You could check to see if the info is the same, but the script runs through the dates so fast its almost the same to just delete the old events and rebuild the calendar. What do you think?
SC

Works like a charm :slight_smile:
I’m about to head out of town for a long weekend (the next 3 or 4 days), so I don’t think I’ll be able to post. Talk to you when I get back.
:cool:

i’m back! :smiley:
that works great! i noticed the 12:00-12:00 thing earlier, i don’t think that’s a big deal. and that’s a good thought about just rebuilding the calendar every time–I’ll just delete the old one and recreate it.
i can’t thank you enough sitcom (and the others who helped me earlier)!
i’m a graphic design major, so if you ever need something like a logo, etc. for a project of yours, let me know!:slight_smile:
i’m going to publish the calendar on icalx.com soon, so i’ll post the link when i’ve got it (in case anyone’s interested :rolleyes:)

…uh oh, after a little more testing, i’m noticing that the script seems to skip certain events every time. at first i thought it was stopping after the first 3 on one day (sept 12, with eight events, only had the first three included in the iCal calendar) but then I noticed there were some days with events listed that ended up with nothing at all in iCal (this seemed to be a problem with Nov 10th, among others). Other ones were skipped as well, for no apparent reason. In the end, I can’t find a single pattern to the ones it’s skipping, although it seems to skip the same ones every time.
There is one thing I think is causing it to skip certain ones:
Multiple events with the same start and end time (script creates only the first event from the page). see Nov 9th (http://intranet.risd.edu/ContentAdministrator/calendar/view/day.asp?month=11&day=9&year=2005)
Other than that, I have no idea…
Examples (as listed above):
Nov 10th—no events are created(http://intranet.risd.edu/ContentAdministrator/calendar/view/day.asp?month=11&day=10&year=2005)
Sept 12th—first three events are created (http://intranet.risd.edu/ContentAdministrator/calendar/view/day.asp?month=9&day=12&year=2005. This date has so many typos and other strange cases on the website. maybe certain ones are being skipped because they have the same start time or end time (or they end up with the same time in iCal when it converts incorrectly entered times all to 12:00am - 12:00pm…also notice on that page the “OM” instead of “PM” which the script doesn’t know what to do with, ending up with Convocation lasting from 1:00am till 1:00am–a problem that I think is impossible and pointless to attempt solving). some might be skipped because they have the same title (ical “summary” field), but i think that’s ok (you don’t really need the same event listed twice, as in “Convocation” on Sept 12th).

anyway, it seems very strange to me. the only things i can really think of to change in the script is to make sure it doesn’t skip events if they have any of the same start or end times.

Sorry to be a bit slow with my timing, but how does this fare? It a continuation of my own script from before and simply deletes every event in the calendar when starting.

There is a bug in this forum! The html for single quotation marks &#39 ; [ignore the space] is automatically changed to a plain text ’ in the editable source of my post after previewing.

set old_delimiters to text item delimiters

tell application "iCal"
	if "School Events" is not in title of calendars then
		set this_calendar to make new calendar at end of calendars with properties {title:"School Events"}
	else
		set this_calendar to first calendar whose title is "School Events"
		delete every event of this_calendar
	end if
end tell

set start_date to date (short date string of date (text returned of (display dialog "Enter the start date:" default answer short date string of (current date))))
set end_date to date (short date string of date (text returned of (display dialog "Enter the end date:" default answer short date string of date ("December 31 " & year of (current date)))))

set this_date to start_date
repeat (end_date - start_date) div days + 1 times
	tell this_date to set the_url to "http://intranet.risd.edu/ContentAdministrator/calendar/view/day.asp?month=" & (month of it as number) & "&day=" & day of it & "&year=" & year of it
	set source_text to do shell script "curl " & quoted form of the_url
	
	if "<b>" is in source_text then
		-- repeat with this_delimiter in {{"&nbsp;", space}, {"'", "'"}, {ASCII character 13, ""}}
		repeat with this_delimiter in {{" ", space}, {"'", "'"}, {ASCII character 13, ""}}
			set text item delimiters to item 1 of this_delimiter
			set source_text to text items of source_text
			set text item delimiters to item 2 of this_delimiter
			set source_text to source_text as string
		end repeat
		set text item delimiters to "<b>"
		set source_text to rest of text items of source_text
		repeat with this_text in source_text
			set text item delimiters to "</b>"
			set summary_property to text item 1 of this_text
			set description_property to do_this(this_text, "<font face=\"verdana\" size=\"2\">", "</font>")
			try
				set location_property to do_this(this_text, "Location:  </span>", "<br>")
			on error
				set location_property to "Not specified"
			end try
			set date_property to do_this(this_text, "Date:  </span>", "<br>")
			set time_property to do_this(this_text, "Time:  </span>", "<br>")
			set text item delimiters to " - "
			set start_date_property to date (date_property & space & text item 1 of time_property)
			set end_date_property to date (date_property & space & text item 2 of time_property)
			tell application "iCal"
				tell this_calendar to set this_event to make new event at end of events with properties {summary:summary_property, description:description_property, location:location_property, start date:start_date_property, end date:end_date_property}
			end tell
		end repeat
	end if
	set this_date to this_date + days
end repeat
set text item delimiters to old_delimiters

on do_this(temp_text, start_delimiter, end_delimiter)
	set text item delimiters to start_delimiter
	set temp_text to text item 2 of temp_text
	set text item delimiters to end_delimiter
	set temp_text to text item 1 of temp_text
end do_this

:smiley:
i have to say, that works extremely well (and extrememly fast compared to telling safari to get all the text)!
thanks qwerty!!! i may just have to use this!:slight_smile:

Whew! I’m glad another approach was presented. I think the path I was taking was leaving behind to many “possibles”. My comment is no matter what approach you take, you are at the mercy of the web master. One change of format will cause you to have to adjust or rewrite the script whichever approach you take. This script will probably have to be modified from time to time, especially if the web designer decides to “go for a new look” in the years to come. Not to worry, so long as this forum is around. :wink:
SC

:slight_smile:
Yeah, hopefully that won’t happen anytime soon :D!