xotcl.wu-wien.ac.at
Begin main content
Search · Index
No registered users in community xowiki
in last 10 minutes

load-timezones.tcl

The program was written by Pedro Liska and has a a few fixes and improvements by Gustaf Neumann. These are

  • regexp matching for time zone names like GMT+5
  • regexp matching for rule entries in the linux timezone database containing no abbreviation (such as for Iran and Israel)
  • change expressions to use byte-code
Without further configuration, the script generates the timezone data with the .dat-new suffix in ref-timezones/sql/common/.
ad_page_contract {
    
    This page loads the linux time zones and time zone rules into
    $timezones_table and $timezone_rules_table

    Some fixes added by Gustaf Neumann

    @author pedroliska.com
    @creation-date 2007-04-13
    @cvs-id $Id: load-timezones.tcl,v 1.3 2007/04/27 23:05:01 pliska Exp $
}

#################################################################
# PLEASE SET THE FOLLOWING PARAMETERS:

# action can be one of: dat_file, db_insert
# - dat_file will generate dat files that can be loded by the
#   ref-timezone *.ctl scripts. See NOTE below.
# - db_insert will insert the timezones directly into 
#   an the DB
set action "dat_file"

# if action ==  dat_file, this param is needed.  The timezones.dat and
# timezone-rules.dat files will be placed in that directory
set dat_file_directory [acs_root_dir]/packages/ref-timezones/sql/common

# if action == db_insert, this param is needed
set timezones_table timezones
set timezone_rules_table timezone_rules

set debug_p 0

# NOTE:
# My goal was to get  the current *.ctl files in ref-timezones package
# to be  able to  load my  .dat files but  I ran  into a  problem with
# generating the timezones-rules.dat file. I was not able to get Tcl's
# clock procs to format the date  to "Mon DD YYYY HH:MI:SS" format. So
# you have  to modify it's .ctl  file to load  it. The date is  in the
# following format: "Dy Mon DD HH24:MI:SS YYYY".

#################################################################

# You should not need to modify anything below this line

proc get_tz_names {} {
    set timezone_list [list]
    set line_list [split [exec -- find /usr/share/zoneinfo/ -type f -print | grep -v /right/ | grep -v /posix] \n]
    foreach line $line_list {
	regexp {^/usr/share/zoneinfo/(.*)$} $line match timezone
	lappend timezone_list $timezone
    }
    return $timezone_list
}

# Returns the timezone rules for tz_name
proc get_linux_tz_rules {tz_name} {
    return [split [exec -- zdump -v /usr/share/zoneinfo/$tz_name] \n]
}

# Returns a Tcl list of Linux timezone rules where each rule is a Tcl list containing:
# - The UTC time
# - The Local Time
# - Is it daylight savings time (0,1)
# - The GMT offset
proc get_tz_rules {tz_name} {
    set tz_rules [list]
    with_catch errmsg {
	# On a couple of rules, I got a warning which made the proc break. The warnings were all 
	# like this: zdump: warning: zone "/usr/share/zoneinfo/iso3166.tab" abbreviation "/usr/share/zoneinfo/iso" differs from POSIX standard
	set linux_rules [get_linux_tz_rules $tz_name]
    } {
	ns_log notice "get_tz_rules_no_cache: Rules ignored for $tz_name due to the following error: $errmsg"
	set linux_rules ""
    }
    foreach line $linux_rules {
	# Sometimes the abbrev has strange values like 'Local time zone must be set--see zic manual page', 
	# so I can't assume it's only one word. This is why I have to rely on the local datetime to be
	# 24 chars in length (which is quite safe to assume)
        set re_tz_name [regsub -all {([+])} $tz_name {\\\1}]
 	if {[regexp "^/usr/share/zoneinfo/$re_tz_name +(.+) UTC = (.{24}) (.+) isdst=(.+) gmtoff=(.+)\$" $line _ \
                 utc_datetime local_datetime abbrev isdst gmtoff]} {
	    lappend tz_rules [list $utc_datetime $local_datetime $abbrev $isdst $gmtoff]
            #ns_write "$utc_datetime|$local_datetime|$abbrev|$isdst|$gmtoff\n"
 	} elseif {[regexp "^/usr/share/zoneinfo/$re_tz_name +(.+) UTC = (.{24}) isdst=(.+) gmtoff=(.+)\$" $line _ \
                 utc_datetime local_datetime isdst gmtoff]} {
          # we have no abbreviation, happens with tc_name like Iran or Israel
          set abbrev $tz_name
          lappend tz_rules [list $utc_datetime $local_datetime $abbrev $isdst $gmtoff]
	} else {
	    ns_write "no match in line '$line', tz='$tz_name'\n"
        }
    }
    return $tz_rules
}
proc secs_to_hhmiss {secs} {
    if {$secs < 0} {
	set sign "-"
    } else {
	set sign "+"
    }
    set secs [expr {abs($secs)}]
    set hours [format %02d [expr {$secs / 3600}]]
    set minutes [format %02d [expr {($secs % 3600) / 60}]]
    set seconds [format %02d [expr {($secs % 3600) % 60}]]
    return $sign$hours$minutes$seconds
}
#################################################################

ReturnHeaders text/html
ns_write "<html><body><pre>\n"



# action can be one of: dat_file, db_insert
switch $action {
    dat_file {
	set tz_filename "$dat_file_directory/timezones.dat-new"
	set tz_file [open $tz_filename w]
	set tz_rules_filename "$dat_file_directory/timezone-rules.dat-new"
	set tz_rules_file [open $tz_rules_filename w]
	ns_write "Inserting timezone and timezone_rules into the following files:\n"
	ns_write "  $tz_filename\n"
	ns_write "  $tz_rules_filename\n\n"
    } 
    db_insert {
	ns_write "Deleting ALL from the $timezones_table table\n"
	db_dml "delete timezones" "delete from $timezones_table"
	ns_write "Deleting ALL from the $timezone_rules_table table\n"
	db_dml "delete timezone rules" "delete from $timezone_rules_table"
	ns_write "Inserting timezone and timezone_rules into the DB\n"
    }
    default {
	error "action '$action' is not supported"
    }	        
}



set tz_id 0
foreach tz_name [get_tz_names] {
    
    # Get the lines of that rule
    set rules [get_tz_rules $tz_name]
    set rule_count [llength $rules]

    if {$rule_count <= 0} {
	# If there are no rules, let's not insert that timezone in the DB.
	# There were several GMT+N timezones that had no rules
	continue
    }
    # There are some rules, like the ones for the iso3166.tab timezone, that
    # have weird abbreviations. I'll just ignore those
    set weird_abbrev_found 0
    foreach rule $rules {
	set abbrev [lindex $rule 2]
	if {[string length $abbrev] > 10} {
	    set weird_abbrev_found 1
	    break
	}
    }
    if {$weird_abbrev_found} {
	continue
    }

    # get the gmt_offset (hhmiss) from the last rule line where isdst=0
    set isdst 1
    set last_non_dst_line $rule_count
    while {$isdst == 1} {
	set isdst [lindex [lindex $rules [incr last_non_dst_line -1] ] 3]
    }
    

    set gmt_offset_in_seconds [lindex [lindex $rules $last_non_dst_line] 4]
    # the linux gmtoff values are in seconds so convert it to hhmiss for the DB
    set gmt_offset_for_db [secs_to_hhmiss $gmt_offset_in_seconds]

    incr tz_id

    ns_write "  $tz_id: Time zone: $tz_name. Linux rule count: $rule_count\n"

    # action can be one of: dat_file, db_insert
    switch $action {
	dat_file {
	    puts $tz_file "$tz_id,$tz_name,$gmt_offset_for_db"
	} 
	db_insert {
	    db_dml "insert the timezone" "
		insert into $timezones_table
		(tz_id, tz,      gmt_offset)
		values
		(:tz_id,:tz_name,:gmt_offset_for_db)"
	}
    }

    # ignore the first and last lines, they just specify the lowest and highest
    # possible time values

    # All the date ranges begin in odd-indexed lines
    # the date ranges end in the date/time specified by the subsequent line
    # This algorith automatically ignores the first line (index = 0) but we
    # also have to ignore the last line.

    for {set i 1} {$i < [expr {$rule_count -1}]} {incr i 2} {

	set current_rule [lindex $rules $i]
	set next_rule [lindex $rules [expr {$i + 1}]]

	if {$debug_p} {
	    ns_write "    $current_rule\n"
	    ns_write "    $next_rule\n"
	}

	set utc_start [lindex $current_rule 0]
	set utc_end [lindex $next_rule 0]
	set local_start [lindex $current_rule 1]
	set local_end [lindex $next_rule 1]
	set abbrev [lindex $current_rule 2]
	set isdst [ad_decode [lindex $current_rule 3] 1 t f]
	set gmt_offset [lindex $current_rule 4]


	if {$debug_p} {
	    ns_write "        $utc_start\n"
	    ns_write "        $utc_end\n"
	    ns_write "        $local_start\n"
	    ns_write "        $local_end\n"
	    ns_write "        $abbrev\n"
	    ns_write "        $isdst\n"
	    ns_write "        $gmt_offset\n"
	}

	# action can be one of: dat_file, db_insert
	switch $action {
	    dat_file {

# Tcl is  not able  to format the  beginning (1901) and  ending (2038)
# date ranges.  It's probably an  off-by-one  problem So I  decided to
# modify the format strings in the .ctl files
# 		set datetime_format "%h %d %Y %H:%M:%S"
# 		with_catch errmsg {
# 		    set utc_start [clock format [clock scan $utc_start -gmt 1] -format $datetime_format -gmt 1]
# 		    set utc_end [clock format [clock scan $utc_end -gmt 1] -format $datetime_format -gmt 1]
# 		    set local_start [clock format [clock scan $local_start -gmt 1] -format $datetime_format -gmt 1]
# 		    set local_end [clock format [clock scan $local_end -gmt 1] -format $datetime_format -gmt 1]
# 		} {
# 		    # One of the dates could not be formatted by Tcl. This usually happens when the
# 		    # date is too far in the past or in the future. Since these rules are at the ends
# 		    # of the supported date ranges, ignoring the rule is safe.
# 		    if {$debug_p} {
# 			ns_write "    ERROR: $errmsg\n"
# 		    }
# 		    continue
# 		}

		puts $tz_rules_file "$tz_id,$abbrev,$utc_start,$utc_end,$local_start,$local_end,$gmt_offset,$isdst"
	    } 
	    db_insert {
		db_dml "insert the timezone rules" "
		    insert into $timezone_rules_table (
		      tz_id, 
		      abbrev,
		      utc_start,
		      utc_end,
		      local_start,
		      local_end,
		      gmt_offset,
		      isdst
		    ) values (
		      :tz_id,
		      :abbrev,
		      to_date(:utc_start,'Dy Mon DD HH24:MI:SS YYYY'),
		      to_date(:utc_end,'Dy Mon DD HH24:MI:SS YYYY'),
		      to_date(:local_start,'Dy Mon DD HH24:MI:SS YYYY'),
		      to_date(:local_end,'Dy Mon DD HH24:MI:SS YYYY'),
		      :gmt_offset,
		      :isdst)"
	    }
	}
    }
}
# action can be one of: dat_file, db_insert
switch $action {
    dat_file {
	close $tz_file
	close $tz_rules_file
	ns_write "Timezones written to files: $tz_id\n"

    }
    db_insert {
	ns_write "Time zones inserted in the DB: $tz_id\n"	
    }
} 

ns_write "</pre></body></html>\n"