diff options
author | Todd C. Miller <millert@cvs.openbsd.org> | 2023-11-17 21:51:38 +0000 |
---|---|---|
committer | Todd C. Miller <millert@cvs.openbsd.org> | 2023-11-17 21:51:38 +0000 |
commit | b52ef81a8c9831bde0d0f061878dfa12c8f88809 (patch) | |
tree | 14c3d97c18dbd41d99d4e2db88546a1cfdf424a7 | |
parent | 46df9b7b518ae9c6725d6e862734184f122f8d3b (diff) |
zoneinfo: install tzdata.zi and leap-seconds.list
Build and install the tzdata.zi file and build the leapseconds file
from leap-seconds.list (installing both versions). Third-party
software now expects these files to be installed. OK sthen@ deraadt@
-rw-r--r-- | share/zoneinfo/Makefile | 133 | ||||
-rw-r--r-- | share/zoneinfo/datfiles/leap-seconds.list | 256 | ||||
-rw-r--r-- | share/zoneinfo/datfiles/leapseconds | 83 | ||||
-rwxr-xr-x | share/zoneinfo/leapseconds.awk | 252 | ||||
-rw-r--r-- | share/zoneinfo/version | 1 | ||||
-rw-r--r-- | share/zoneinfo/ziguard.awk | 386 | ||||
-rw-r--r-- | share/zoneinfo/zishrink.awk | 356 |
7 files changed, 1353 insertions, 114 deletions
diff --git a/share/zoneinfo/Makefile b/share/zoneinfo/Makefile index c18c2b08931..c3643725442 100644 --- a/share/zoneinfo/Makefile +++ b/share/zoneinfo/Makefile @@ -1,32 +1,49 @@ -# $OpenBSD: Makefile,v 1.15 2020/10/07 22:33:31 millert Exp $ -# $NetBSD: Makefile,v 1.14 1995/04/22 12:10:17 cgd Exp $ - -# Change the line below for your time zone (after finding the zone you want in -# the time zone files, or adding it to a time zone file). -# Alternately, if you discover you've got the wrong time zone, you can just -# zic -l rightzone - -# This line has been moved to /usr/src/etc/Makefile -LOCALTIME= US/Pacific - -# If you want something other than Eastern United States time as a template -# for handling POSIX-style time zone environment variables, -# change the line below (after finding the zone you want in the -# time zone files, or adding it to a time zone file). -# Alternately, if you discover you've got the wrong time zone, you can just -# zic -p rightzone - +# $OpenBSD: Makefile,v 1.16 2023/11/17 21:51:37 millert Exp $ + +# DATAFORM selects the data format. OpenBSD always uses "main" +# Available formats represent essentially the same data, albeit +# possibly with minor discrepancies that users are not likely to notice. +# To get new features and the best data right away, use: +# DATAFORM= vanguard +# To wait a while before using new features, to give downstream users +# time to upgrade zic (the default), use: +# DATAFORM= main +# To wait even longer for new features, use: +# DATAFORM= rearguard +# Rearguard users might also want "ZFLAGS = -b fat"; see below. +DATAFORM= main + +# The POSIXRULES macro controls interpretation of POSIX-like TZ +# settings like TZ='EET-2EEST' that lack DST transition rules. +# If POSIXRULES is '-', no template is installed; this is the default. +# Any other value for POSIXRULES is obsolete and should not be relied on, as: +# * It does not work correctly in popular implementations such as GNU/Linux. +# * It does not work even in tzcode, except for historical timestamps +# that precede the last explicit transition in the POSIXRULES file. +# Hence it typically does not work for current and future timestamps. +# If, despite the above, you want a template for handling these settings, +# you can change the line below (after finding the timezone you want in the +# one of the $(TDATA) source files, or adding it to a source file). +# Alternatively, if you discover you've got the wrong timezone, you can just +# 'zic -p -' to remove it, or 'zic -p rightzone' to change it. +# Use the command +# make zonenames +# to get a list of the values you can use for POSIXRULES. POSIXRULES= US/Pacific -# Use an absolute path name for TZDIR unless you're just testing the software. - +# "Compiled" timezone information is placed in the "TZDIR" directory +# (and subdirectories). +# TZDIR_BASENAME should not contain "/" and should not be ".", ".." or empty. +TZDIR_BASENAME= zoneinfo TZDIR= ${DESTDIR}/usr/share/zoneinfo -# If you always want time values interpreted as "seconds since the epoch -# (not counting leap seconds)", use -# REDO= posix_only -# below. If you always want right time values interpreted as "seconds since -# the epoch" (counting leap seconds)", use +# What kind of TZif data files to generate. (TZif is the binary time +# zone data format that zic generates; see Internet RFC 8536.) +# If you want only POSIX time, with time values interpreted as +# seconds since the epoch (not counting leap seconds), use +# REDO= posix_only +# below. If you want only "right" time, with values interpreted +# as seconds since the epoch (counting leap seconds), use # REDO= right_only # below. If you want both sets of data available, with leap seconds not # counted normally, use @@ -34,20 +51,66 @@ TZDIR= ${DESTDIR}/usr/share/zoneinfo # below. If you want both sets of data available, with leap seconds counted # normally, use # REDO= right_posix -# below. - +# below. POSIX mandates that leap seconds not be counted; for compatibility +# with it, use "posix_only" or "posix_right". Use POSIX time on systems with +# leap smearing; this can work better than unsmeared "right" time with +# applications that are not leap second aware, and is closer to unsmeared +# "right" time than unsmeared POSIX time is (e.g., 0.5 vs 1.0 s max error). REDO= posix_only +# Whether to put an "Expires" line in the leapseconds file. +# Use EXPIRES_LINE=1 to put the line in, 0 to omit it. +# The EXPIRES_LINE value matters only if REDO's value contains "right". +# If you change EXPIRES_LINE, remove the leapseconds file before running "make". +# zic's support for the Expires line was introduced in tzdb 2020a, +# and was modified in tzdb 2021b to generate version 4 TZif files. +# EXPIRES_LINE defaults to 0 for now so that the leapseconds file +# can be given to pre-2020a zic implementations and so that TZif files +# built by newer zic implementations can be read by pre-2021b libraries. +EXPIRES_LINE= 0 + +# To install data in text form that has all the information of the TZif data, +# (optionally incorporating leap second information), use +# TZDATA_TEXT= tzdata.zi leapseconds +# To install text data without leap second information (e.g., because +# REDO='posix_only'), use +# TZDATA_TEXT= tzdata.zi +# To avoid installing text data, use +# TZDATA_TEXT= +TZDATA_TEXT= leapseconds tzdata.zi + TDATA= africa antarctica asia australasia \ europe northamerica southamerica etcetera factory \ backward -TABDATA= iso3166.tab zone.tab zone1970.tab -DATA= $(TDATA) $(TABDATA) leapseconds -USNO= usno1988 usno1989 usno1989a usno1995 usno1997 +TABDATA= iso3166.tab zone.tab zone1970.tab $(TZDATA_TEXT) +DATA= $(TDATA) $(TABDATA) +DSTDATA_ZI_DEPS= ziguard.awk $(TDATA) ZIC= zic -all: +all: leapseconds tzdata.zi + +$(DATAFORM).zi: $(DSTDATA_ZI_DEPS) + (cd ${.CURDIR}/datfiles && \ + awk -v DATAFORM=`expr $@ : '\(.*\).zi'` -f ../ziguard.awk \ + $(TDATA) >${.OBJDIR}/$@.out) + mv ${.OBJDIR}/$@.out ${.OBJDIR}/$@ + +tzdata.zi: $(DATAFORM).zi version zishrink.awk + (cd ${.CURDIR}/datfiles && version=`sed 1q ../version` && \ + LC_ALL=C awk \ + -v dataform='$(DATAFORM)' \ + -v deps='$(DSTDATA_ZI_DEPS) zishrink.awk' \ + -v redo='$(REDO)' \ + -v version="$$version" \ + -f ../zishrink.awk \ + ${.OBJDIR}/$(DATAFORM).zi >${.OBJDIR}/$@.out) + mv ${.OBJDIR}/$@.out ${.OBJDIR}/$@ + +leapseconds: leapseconds.awk datfiles/leap-seconds.list + awk -v EXPIRES_LINE=$(EXPIRES_LINE) -f ${.CURDIR}/leapseconds.awk \ + ${.CURDIR}/datfiles/leap-seconds.list >${.OBJDIR}/$@.out + mv ${.OBJDIR}/$@.out ${.OBJDIR}/$@ posix_only: ${TDATA} (cd ${.CURDIR}/datfiles; \ @@ -76,11 +139,19 @@ realinstall: ${DATA} ${REDO} -type d -exec chmod a=rx,u+w {} + ${INSTALL} -c -o root -g bin -m 644 ${.CURDIR}/datfiles/iso3166.tab \ ${DESTDIR}/usr/share/misc + ${INSTALL} -c -o root -g bin -m 644 leapseconds \ + ${DESTDIR}/usr/share/zoneinfo + ${INSTALL} -c -o root -g bin -m 644 ${.CURDIR}/datfiles/leap-seconds.list \ + ${DESTDIR}/usr/share/zoneinfo + ${INSTALL} -c -o root -g bin -m 644 tzdata.zi \ + ${DESTDIR}/usr/share/zoneinfo ${INSTALL} -c -o root -g bin -m 644 ${.CURDIR}/datfiles/zone.tab \ ${DESTDIR}/usr/share/zoneinfo ${INSTALL} -c -o root -g bin -m 644 ${.CURDIR}/datfiles/zone1970.tab \ ${DESTDIR}/usr/share/zoneinfo +clean: + rm -f leapseconds *.zi .PATH: ${.CURDIR}/datfiles .include <bsd.prog.mk> diff --git a/share/zoneinfo/datfiles/leap-seconds.list b/share/zoneinfo/datfiles/leap-seconds.list new file mode 100644 index 00000000000..c250346559b --- /dev/null +++ b/share/zoneinfo/datfiles/leap-seconds.list @@ -0,0 +1,256 @@ +# $OpenBSD: leap-seconds.list,v 1.1 2023/11/17 21:51:37 millert Exp $ +# +# In the following text, the symbol '#' introduces +# a comment, which continues from that symbol until +# the end of the line. A plain comment line has a +# whitespace character following the comment indicator. +# There are also special comment lines defined below. +# A special comment will always have a non-whitespace +# character in column 2. +# +# A blank line should be ignored. +# +# The following table shows the corrections that must +# be applied to compute International Atomic Time (TAI) +# from the Coordinated Universal Time (UTC) values that +# are transmitted by almost all time services. +# +# The first column shows an epoch as a number of seconds +# since 1 January 1900, 00:00:00 (1900.0 is also used to +# indicate the same epoch.) Both of these time stamp formats +# ignore the complexities of the time scales that were +# used before the current definition of UTC at the start +# of 1972. (See note 3 below.) +# The second column shows the number of seconds that +# must be added to UTC to compute TAI for any timestamp +# at or after that epoch. The value on each line is +# valid from the indicated initial instant until the +# epoch given on the next one or indefinitely into the +# future if there is no next line. +# (The comment on each line shows the representation of +# the corresponding initial epoch in the usual +# day-month-year format. The epoch always begins at +# 00:00:00 UTC on the indicated day. See Note 5 below.) +# +# Important notes: +# +# 1. Coordinated Universal Time (UTC) is often referred to +# as Greenwich Mean Time (GMT). The GMT time scale is no +# longer used, and the use of GMT to designate UTC is +# discouraged. +# +# 2. The UTC time scale is realized by many national +# laboratories and timing centers. Each laboratory +# identifies its realization with its name: Thus +# UTC(NIST), UTC(USNO), etc. The differences among +# these different realizations are typically on the +# order of a few nanoseconds (i.e., 0.000 000 00x s) +# and can be ignored for many purposes. These differences +# are tabulated in Circular T, which is published monthly +# by the International Bureau of Weights and Measures +# (BIPM). See www.bipm.org for more information. +# +# 3. The current definition of the relationship between UTC +# and TAI dates from 1 January 1972. A number of different +# time scales were in use before that epoch, and it can be +# quite difficult to compute precise timestamps and time +# intervals in those "prehistoric" days. For more information, +# consult: +# +# The Explanatory Supplement to the Astronomical +# Ephemeris. +# or +# Terry Quinn, "The BIPM and the Accurate Measurement +# of Time," Proc. of the IEEE, Vol. 79, pp. 894-905, +# July, 1991. <http://dx.doi.org/10.1109/5.84965> +# reprinted in: +# Christine Hackman and Donald B Sullivan (eds.) +# Time and Frequency Measurement +# American Association of Physics Teachers (1996) +# <http://tf.nist.gov/general/pdf/1168.pdf>, pp. 75-86 +# +# 4. The decision to insert a leap second into UTC is currently +# the responsibility of the International Earth Rotation and +# Reference Systems Service. (The name was changed from the +# International Earth Rotation Service, but the acronym IERS +# is still used.) +# +# Leap seconds are announced by the IERS in its Bulletin C. +# +# See www.iers.org for more details. +# +# Every national laboratory and timing center uses the +# data from the BIPM and the IERS to construct UTC(lab), +# their local realization of UTC. +# +# Although the definition also includes the possibility +# of dropping seconds ("negative" leap seconds), this has +# never been done and is unlikely to be necessary in the +# foreseeable future. +# +# 5. If your system keeps time as the number of seconds since +# some epoch (e.g., NTP timestamps), then the algorithm for +# assigning a UTC time stamp to an event that happens during a positive +# leap second is not well defined. The official name of that leap +# second is 23:59:60, but there is no way of representing that time +# in these systems. +# Many systems of this type effectively stop the system clock for +# one second during the leap second and use a time that is equivalent +# to 23:59:59 UTC twice. For these systems, the corresponding TAI +# timestamp would be obtained by advancing to the next entry in the +# following table when the time equivalent to 23:59:59 UTC +# is used for the second time. Thus the leap second which +# occurred on 30 June 1972 at 23:59:59 UTC would have TAI +# timestamps computed as follows: +# +# ... +# 30 June 1972 23:59:59 (2287785599, first time): TAI= UTC + 10 seconds +# 30 June 1972 23:59:60 (2287785599,second time): TAI= UTC + 11 seconds +# 1 July 1972 00:00:00 (2287785600) TAI= UTC + 11 seconds +# ... +# +# If your system realizes the leap second by repeating 00:00:00 UTC twice +# (this is possible but not usual), then the advance to the next entry +# in the table must occur the second time that a time equivalent to +# 00:00:00 UTC is used. Thus, using the same example as above: +# +# ... +# 30 June 1972 23:59:59 (2287785599): TAI= UTC + 10 seconds +# 30 June 1972 23:59:60 (2287785600, first time): TAI= UTC + 10 seconds +# 1 July 1972 00:00:00 (2287785600,second time): TAI= UTC + 11 seconds +# ... +# +# in both cases the use of timestamps based on TAI produces a smooth +# time scale with no discontinuity in the time interval. However, +# although the long-term behavior of the time scale is correct in both +# methods, the second method is technically not correct because it adds +# the extra second to the wrong day. +# +# This complexity would not be needed for negative leap seconds (if they +# are ever used). The UTC time would skip 23:59:59 and advance from +# 23:59:58 to 00:00:00 in that case. The TAI offset would decrease by +# 1 second at the same instant. This is a much easier situation to deal +# with, since the difficulty of unambiguously representing the epoch +# during the leap second does not arise. +# +# Some systems implement leap seconds by amortizing the leap second +# over the last few minutes of the day. The frequency of the local +# clock is decreased (or increased) to realize the positive (or +# negative) leap second. This method removes the time step described +# above. Although the long-term behavior of the time scale is correct +# in this case, this method introduces an error during the adjustment +# period both in time and in frequency with respect to the official +# definition of UTC. +# +# Questions or comments to: +# Judah Levine +# Time and Frequency Division +# NIST +# Boulder, Colorado +# Judah.Levine@nist.gov +# +# Last Update of leap second values: 8 July 2016 +# +# The following line shows this last update date in NTP timestamp +# format. This is the date on which the most recent change to +# the leap second data was added to the file. This line can +# be identified by the unique pair of characters in the first two +# columns as shown below. +# +#$ 3676924800 +# +# The NTP timestamps are in units of seconds since the NTP epoch, +# which is 1 January 1900, 00:00:00. The Modified Julian Day number +# corresponding to the NTP time stamp, X, can be computed as +# +# X/86400 + 15020 +# +# where the first term converts seconds to days and the second +# term adds the MJD corresponding to the time origin defined above. +# The integer portion of the result is the integer MJD for that +# day, and any remainder is the time of day, expressed as the +# fraction of the day since 0 hours UTC. The conversion from day +# fraction to seconds or to hours, minutes, and seconds may involve +# rounding or truncation, depending on the method used in the +# computation. +# +# The data in this file will be updated periodically as new leap +# seconds are announced. In addition to being entered on the line +# above, the update time (in NTP format) will be added to the basic +# file name leap-seconds to form the name leap-seconds.<NTP TIME>. +# In addition, the generic name leap-seconds.list will always point to +# the most recent version of the file. +# +# This update procedure will be performed only when a new leap second +# is announced. +# +# The following entry specifies the expiration date of the data +# in this file in units of seconds since the origin at the instant +# 1 January 1900, 00:00:00. This expiration date will be changed +# at least twice per year whether or not a new leap second is +# announced. These semi-annual changes will be made no later +# than 1 June and 1 December of each year to indicate what +# action (if any) is to be taken on 30 June and 31 December, +# respectively. (These are the customary effective dates for new +# leap seconds.) This expiration date will be identified by a +# unique pair of characters in columns 1 and 2 as shown below. +# In the unlikely event that a leap second is announced with an +# effective date other than 30 June or 31 December, then this +# file will be edited to include that leap second as soon as it is +# announced or at least one month before the effective date +# (whichever is later). +# If an announcement by the IERS specifies that no leap second is +# scheduled, then only the expiration date of the file will +# be advanced to show that the information in the file is still +# current -- the update time stamp, the data and the name of the file +# will not change. +# +# Updated through IERS Bulletin C65 +# File expires on: 28 December 2023 +# +#@ 3912710400 +# +2272060800 10 # 1 Jan 1972 +2287785600 11 # 1 Jul 1972 +2303683200 12 # 1 Jan 1973 +2335219200 13 # 1 Jan 1974 +2366755200 14 # 1 Jan 1975 +2398291200 15 # 1 Jan 1976 +2429913600 16 # 1 Jan 1977 +2461449600 17 # 1 Jan 1978 +2492985600 18 # 1 Jan 1979 +2524521600 19 # 1 Jan 1980 +2571782400 20 # 1 Jul 1981 +2603318400 21 # 1 Jul 1982 +2634854400 22 # 1 Jul 1983 +2698012800 23 # 1 Jul 1985 +2776982400 24 # 1 Jan 1988 +2840140800 25 # 1 Jan 1990 +2871676800 26 # 1 Jan 1991 +2918937600 27 # 1 Jul 1992 +2950473600 28 # 1 Jul 1993 +2982009600 29 # 1 Jul 1994 +3029443200 30 # 1 Jan 1996 +3076704000 31 # 1 Jul 1997 +3124137600 32 # 1 Jan 1999 +3345062400 33 # 1 Jan 2006 +3439756800 34 # 1 Jan 2009 +3550089600 35 # 1 Jul 2012 +3644697600 36 # 1 Jul 2015 +3692217600 37 # 1 Jan 2017 +# +# the following special comment contains the +# hash value of the data in this file computed +# use the secure hash algorithm as specified +# by FIPS 180-1. See the files in ~/pub/sha for +# the details of how this hash value is +# computed. Note that the hash computation +# ignores comments and whitespace characters +# in data lines. It includes the NTP values +# of both the last modification time and the +# expiration time of the file, but not the +# white space on those lines. +# the hash line is also ignored in the +# computation. +# +#h e76a99dc 65f15cc7 e613e040 f5078b5e b23834fe diff --git a/share/zoneinfo/datfiles/leapseconds b/share/zoneinfo/datfiles/leapseconds deleted file mode 100644 index 168720c16b8..00000000000 --- a/share/zoneinfo/datfiles/leapseconds +++ /dev/null @@ -1,83 +0,0 @@ -# $OpenBSD: leapseconds,v 1.48 2023/03/23 16:12:11 millert Exp $ -# Allowance for leap seconds added to each time zone file. - -# This file is in the public domain. - -# This file is generated automatically from the data in the public-domain -# NIST format leap-seconds.list file, which can be copied from -# <ftp://ftp.nist.gov/pub/time/leap-seconds.list> -# or <ftp://ftp.boulder.nist.gov/pub/time/leap-seconds.list>. -# The NIST file is used instead of its IERS upstream counterpart -# <https://hpiers.obspm.fr/iers/bul/bulc/ntp/leap-seconds.list> -# because under US law the NIST file is public domain -# whereas the IERS file's copyright and license status is unclear. -# For more about leap-seconds.list, please see -# The NTP Timescale and Leap Seconds -# <https://www.eecis.udel.edu/~mills/leap.html>. - -# The rules for leap seconds are specified in Annex 1 (Time scales) of: -# Standard-frequency and time-signal emissions. -# International Telecommunication Union - Radiocommunication Sector -# (ITU-R) Recommendation TF.460-6 (02/2002) -# <https://www.itu.int/rec/R-REC-TF.460-6-200202-I/>. -# The International Earth Rotation and Reference Systems Service (IERS) -# periodically uses leap seconds to keep UTC to within 0.9 s of UT1 -# (a proxy for Earth's angle in space as measured by astronomers) -# and publishes leap second data in a copyrighted file -# <https://hpiers.obspm.fr/iers/bul/bulc/Leap_Second.dat>. -# See: Levine J. Coordinated Universal Time and the leap second. -# URSI Radio Sci Bull. 2016;89(4):30-6. doi:10.23919/URSIRSB.2016.7909995 -# <https://ieeexplore.ieee.org/document/7909995>. - -# There were no leap seconds before 1972, as no official mechanism -# accounted for the discrepancy between atomic time (TAI) and the earth's -# rotation. The first ("1 Jan 1972") data line in leap-seconds.list -# does not denote a leap second; it denotes the start of the current definition -# of UTC. - -# All leap-seconds are Stationary (S) at the given UTC time. -# The correction (+ or -) is made at the given time, so in the unlikely -# event of a negative leap second, a line would look like this: -# Leap YEAR MON DAY 23:59:59 - S -# Typical lines look like this: -# Leap YEAR MON DAY 23:59:60 + S -Leap 1972 Jun 30 23:59:60 + S -Leap 1972 Dec 31 23:59:60 + S -Leap 1973 Dec 31 23:59:60 + S -Leap 1974 Dec 31 23:59:60 + S -Leap 1975 Dec 31 23:59:60 + S -Leap 1976 Dec 31 23:59:60 + S -Leap 1977 Dec 31 23:59:60 + S -Leap 1978 Dec 31 23:59:60 + S -Leap 1979 Dec 31 23:59:60 + S -Leap 1981 Jun 30 23:59:60 + S -Leap 1982 Jun 30 23:59:60 + S -Leap 1983 Jun 30 23:59:60 + S -Leap 1985 Jun 30 23:59:60 + S -Leap 1987 Dec 31 23:59:60 + S -Leap 1989 Dec 31 23:59:60 + S -Leap 1990 Dec 31 23:59:60 + S -Leap 1992 Jun 30 23:59:60 + S -Leap 1993 Jun 30 23:59:60 + S -Leap 1994 Jun 30 23:59:60 + S -Leap 1995 Dec 31 23:59:60 + S -Leap 1997 Jun 30 23:59:60 + S -Leap 1998 Dec 31 23:59:60 + S -Leap 2005 Dec 31 23:59:60 + S -Leap 2008 Dec 31 23:59:60 + S -Leap 2012 Jun 30 23:59:60 + S -Leap 2015 Jun 30 23:59:60 + S -Leap 2016 Dec 31 23:59:60 + S - -# UTC timestamp when this leap second list expires. -# Any additional leap seconds will come after this. -# This Expires line is commented out for now, -# so that pre-2020a zic implementations do not reject this file. -#Expires 2023 Dec 28 00:00:00 - -# POSIX timestamps for the data in this file: -#updated 1467936000 (2016-07-08 00:00:00 UTC) -#expires 1703721600 (2023-12-28 00:00:00 UTC) - -# Updated through IERS Bulletin C65 -# File expires on: 28 December 2023 diff --git a/share/zoneinfo/leapseconds.awk b/share/zoneinfo/leapseconds.awk new file mode 100755 index 00000000000..7d2556bf1ca --- /dev/null +++ b/share/zoneinfo/leapseconds.awk @@ -0,0 +1,252 @@ +# Generate zic format 'leapseconds' from NIST format 'leap-seconds.list'. + +# This file is in the public domain. + +# This program uses awk arithmetic. POSIX requires awk to support +# exact integer arithmetic only through 10**10, which means for NTP +# timestamps this program works only to the year 2216, which is the +# year 1900 plus 10**10 seconds. However, in practice +# POSIX-conforming awk implementations invariably use IEEE-754 double +# and so support exact integers through 2**53. By the year 2216, +# POSIX will almost surely require at least 2**53 for awk, so for NTP +# timestamps this program should be good until the year 285,428,681 +# (the year 1900 plus 2**53 seconds). By then leap seconds will be +# long obsolete, as the Earth will likely slow down so much that +# there will be more than 25 hours per day and so some other scheme +# will be needed. + +BEGIN { + print "# Allowance for leap seconds added to each time zone file." + print "" + print "# This file is in the public domain." + print "" + print "# This file is generated automatically from the data in the public-domain" + print "# NIST format leap-seconds.list file, which can be copied from" + print "# <ftp://ftp.nist.gov/pub/time/leap-seconds.list>" + print "# or <ftp://ftp.boulder.nist.gov/pub/time/leap-seconds.list>." + print "# The NIST file is used instead of its IERS upstream counterpart" + print "# <https://hpiers.obspm.fr/iers/bul/bulc/ntp/leap-seconds.list>" + print "# because under US law the NIST file is public domain" + print "# whereas the IERS file's copyright and license status is unclear." + print "# For more about leap-seconds.list, please see" + print "# The NTP Timescale and Leap Seconds" + print "# <https://www.eecis.udel.edu/~mills/leap.html>." + print "" + print "# The rules for leap seconds are specified in Annex 1 (Time scales) of:" + print "# Standard-frequency and time-signal emissions." + print "# International Telecommunication Union - Radiocommunication Sector" + print "# (ITU-R) Recommendation TF.460-6 (02/2002)" + print "# <https://www.itu.int/rec/R-REC-TF.460-6-200202-I/>." + print "# The International Earth Rotation and Reference Systems Service (IERS)" + print "# periodically uses leap seconds to keep UTC to within 0.9 s of UT1" + print "# (a proxy for Earth's angle in space as measured by astronomers)" + print "# and publishes leap second data in a copyrighted file" + print "# <https://hpiers.obspm.fr/iers/bul/bulc/Leap_Second.dat>." + print "# See: Levine J. Coordinated Universal Time and the leap second." + print "# URSI Radio Sci Bull. 2016;89(4):30-6. doi:10.23919/URSIRSB.2016.7909995" + print "# <https://ieeexplore.ieee.org/document/7909995>." + print "" + print "# There were no leap seconds before 1972, as no official mechanism" + print "# accounted for the discrepancy between atomic time (TAI) and the earth's" + print "# rotation. The first (\"1 Jan 1972\") data line in leap-seconds.list" + print "# does not denote a leap second; it denotes the start of the current definition" + print "# of UTC." + print "" + print "# All leap-seconds are Stationary (S) at the given UTC time." + print "# The correction (+ or -) is made at the given time, so in the unlikely" + print "# event of a negative leap second, a line would look like this:" + print "# Leap YEAR MON DAY 23:59:59 - S" + print "# Typical lines look like this:" + print "# Leap YEAR MON DAY 23:59:60 + S" + + monthabbr[ 1] = "Jan" + monthabbr[ 2] = "Feb" + monthabbr[ 3] = "Mar" + monthabbr[ 4] = "Apr" + monthabbr[ 5] = "May" + monthabbr[ 6] = "Jun" + monthabbr[ 7] = "Jul" + monthabbr[ 8] = "Aug" + monthabbr[ 9] = "Sep" + monthabbr[10] = "Oct" + monthabbr[11] = "Nov" + monthabbr[12] = "Dec" + + sstamp_init() +} + +# In case the input has CRLF form a la NIST. +{ sub(/\r$/, "") } + +/^#[ \t]*[Uu]pdated through/ || /^#[ \t]*[Ff]ile expires on/ { + last_lines = last_lines $0 "\n" +} + +/^#[$][ \t]/ { updated = $2 } +/^#[@][ \t]/ { expires = $2 } + +/^[ \t]*#/ { next } + +{ + NTP_timestamp = $1 + TAI_minus_UTC = $2 + if (old_TAI_minus_UTC) { + if (old_TAI_minus_UTC < TAI_minus_UTC) { + sign = "23:59:60\t+" + } else { + sign = "23:59:59\t-" + } + sstamp_to_ymdhMs(NTP_timestamp - 1, ss_NTP) + printf "Leap\t%d\t%s\t%d\t%s\tS\n", \ + ss_year, monthabbr[ss_month], ss_mday, sign + } + old_TAI_minus_UTC = TAI_minus_UTC +} + +END { + print "" + + if (expires) { + sstamp_to_ymdhMs(expires, ss_NTP) + + print "# UTC timestamp when this leap second list expires." + print "# Any additional leap seconds will come after this." + if (! EXPIRES_LINE) { + print "# This Expires line is commented out for now," + print "# so that pre-2020a zic implementations do not reject this file." + } + printf "%sExpires %.4d\t%s\t%.2d\t%.2d:%.2d:%.2d\n", \ + EXPIRES_LINE ? "" : "#", \ + ss_year, monthabbr[ss_month], ss_mday, ss_hour, ss_min, ss_sec + } else { + print "# (No Expires line, since the expires time is unknown.)" + } + + # The difference between the NTP and POSIX epochs is 70 years + # (including 17 leap days), each 24 hours of 60 minutes of 60 + # seconds each. + epoch_minus_NTP = ((1970 - 1900) * 365 + 17) * 24 * 60 * 60 + + print "" + print "# POSIX timestamps for the data in this file:" + if (updated) { + sstamp_to_ymdhMs(updated, ss_NTP) + printf "#updated %d (%.4d-%.2d-%.2d %.2d:%.2d:%.2d UTC)\n", \ + updated - epoch_minus_NTP, \ + ss_year, ss_month, ss_mday, ss_hour, ss_min, ss_sec + } else { + print "#(updated time unknown)" + } + if (expires) { + sstamp_to_ymdhMs(expires, ss_NTP) + printf "#expires %d (%.4d-%.2d-%.2d %.2d:%.2d:%.2d UTC)\n", \ + expires - epoch_minus_NTP, \ + ss_year, ss_month, ss_mday, ss_hour, ss_min, ss_sec + } else { + print "#(expires time unknown)" + } + printf "\n%s", last_lines +} + +# sstamp_to_ymdhMs - convert seconds timestamp to date and time +# +# Call as: +# +# sstamp_to_ymdhMs(sstamp, epoch_days) +# +# where: +# +# sstamp - is the seconds timestamp. +# epoch_days - is the timestamp epoch in Gregorian days since 1600-03-01. +# ss_NTP is appropriate for an NTP sstamp. +# +# Both arguments should be nonnegative integers. +# On return, the following variables are set based on sstamp: +# +# ss_year - Gregorian calendar year +# ss_month - month of the year (1-January to 12-December) +# ss_mday - day of the month (1-31) +# ss_hour - hour (0-23) +# ss_min - minute (0-59) +# ss_sec - second (0-59) +# ss_wday - day of week (0-Sunday to 6-Saturday) +# +# The function sstamp_init should be called prior to using sstamp_to_ymdhMs. + +function sstamp_init() +{ + # Days in month N, where March is month 0 and January month 10. + ss_mon_days[ 0] = 31 + ss_mon_days[ 1] = 30 + ss_mon_days[ 2] = 31 + ss_mon_days[ 3] = 30 + ss_mon_days[ 4] = 31 + ss_mon_days[ 5] = 31 + ss_mon_days[ 6] = 30 + ss_mon_days[ 7] = 31 + ss_mon_days[ 8] = 30 + ss_mon_days[ 9] = 31 + ss_mon_days[10] = 31 + + # Counts of days in a Gregorian year, quad-year, century, and quad-century. + ss_year_days = 365 + ss_quadyear_days = ss_year_days * 4 + 1 + ss_century_days = ss_quadyear_days * 25 - 1 + ss_quadcentury_days = ss_century_days * 4 + 1 + + # Standard day epochs, suitable for epoch_days. + # ss_MJD = 94493 + # ss_POSIX = 135080 + ss_NTP = 109513 +} + +function sstamp_to_ymdhMs(sstamp, epoch_days, \ + quadcentury, century, quadyear, year, month, day) +{ + ss_hour = int(sstamp / 3600) % 24 + ss_min = int(sstamp / 60) % 60 + ss_sec = sstamp % 60 + + # Start with a count of days since 1600-03-01 Gregorian. + day = epoch_days + int(sstamp / (24 * 60 * 60)) + + # Compute a year-month-day date with days of the month numbered + # 0-30, months (March-February) numbered 0-11, and years that start + # start March 1 and end after the last day of February. A quad-year + # starts on March 1 of a year evenly divisible by 4 and ends after + # the last day of February 4 years later. A century starts on and + # ends before March 1 in years evenly divisible by 100. + # A quad-century starts on and ends before March 1 in years divisible + # by 400. While the number of days in a quad-century is a constant, + # the number of days in each other time period can vary by 1. + # Any variation is in the last day of the time period (there might + # or might not be a February 29) where it is easy to deal with. + + quadcentury = int(day / ss_quadcentury_days) + day -= quadcentury * ss_quadcentury_days + ss_wday = (day + 3) % 7 + century = int(day / ss_century_days) + century -= century == 4 + day -= century * ss_century_days + quadyear = int(day / ss_quadyear_days) + day -= quadyear * ss_quadyear_days + year = int(day / ss_year_days) + year -= year == 4 + day -= year * ss_year_days + for (month = 0; month < 11; month++) { + if (day < ss_mon_days[month]) + break + day -= ss_mon_days[month] + } + + # Convert the date to a conventional day of month (1-31), + # month (1-12, January-December) and Gregorian year. + ss_mday = day + 1 + if (month <= 9) { + ss_month = month + 3 + } else { + ss_month = month - 9 + year++ + } + ss_year = 1600 + quadcentury * 400 + century * 100 + quadyear * 4 + year +} diff --git a/share/zoneinfo/version b/share/zoneinfo/version new file mode 100644 index 00000000000..49f35c76f69 --- /dev/null +++ b/share/zoneinfo/version @@ -0,0 +1 @@ +2023cgtz diff --git a/share/zoneinfo/ziguard.awk b/share/zoneinfo/ziguard.awk new file mode 100644 index 00000000000..7a3404fa4fc --- /dev/null +++ b/share/zoneinfo/ziguard.awk @@ -0,0 +1,386 @@ +# Convert tzdata source into vanguard or rearguard form. + +# Contributed by Paul Eggert. This file is in the public domain. + +# This is not a general-purpose converter; it is designed for current tzdata. +# It just converts from current source to main, vanguard, and rearguard forms. +# Although it might be nice for it to be idempotent, or to be useful +# for converting back and forth between vanguard and rearguard formats, +# it does not do these nonessential tasks now. +# +# Although main and vanguard forms are currently equivalent, +# this need not always be the case. When the two forms differ, +# this script can convert either from main to vanguard form (needed then), +# or from vanguard to main form (this conversion would be needed later, +# after main became rearguard and vanguard became main). +# There is no need to convert rearguard to other forms. +# +# When converting to vanguard form, the output can use the line +# "Zone GMT 0 - GMT" which TZUpdater 2.3.2 mistakenly rejects. +# +# When converting to vanguard form, the output can use negative SAVE +# values. +# +# When converting to rearguard form, the output uses only nonnegative +# SAVE values. The idea is for the output data to simulate the behavior +# of the input data as best it can within the constraints of the +# rearguard format. + +# Given a FIELD like "-0:30", return a minute count like -30. +function get_minutes(field, \ + sign, hours, minutes) +{ + sign = field ~ /^-/ ? -1 : 1 + hours = +field + if (field ~ /:/) { + minutes = field + sub(/[^:]*:/, "", minutes) + } + return 60 * hours + sign * minutes +} + +# Given an OFFSET, which is a minute count like 300 or 330, +# return a %z-style abbreviation like "+05" or "+0530". +function offset_abbr(offset, \ + hours, minutes, sign) +{ + hours = int(offset / 60) + minutes = offset % 60 + if (minutes) { + return sprintf("%+.4d", hours * 100 + minutes); + } else { + return sprintf("%+.2d", hours) + } +} + +# Round TIMESTAMP (a +-hh:mm:ss.dddd string) to the nearest second. +function round_to_second(timestamp, \ + hh, mm, ss, seconds, dot_dddd, subseconds) +{ + dot_dddd = timestamp + if (!sub(/^[+-]?[0-9]+:[0-9]+:[0-9]+\./, ".", dot_dddd)) + return timestamp + hh = mm = ss = timestamp + sub(/^[-+]?[0-9]+:[0-9]+:/, "", ss) + sub(/^[-+]?[0-9]+:/, "", mm) + sub(/^[-+]?/, "", hh) + seconds = 3600 * hh + 60 * mm + ss + subseconds = +dot_dddd + seconds += 0.5 < subseconds || ((subseconds == 0.5) && (seconds % 2)); + return sprintf("%s%d:%.2d:%.2d", timestamp ~ /^-/ ? "-" : "", \ + seconds / 3600, seconds / 60 % 60, seconds % 60) +} + +BEGIN { + dataform_type["vanguard"] = 1 + dataform_type["main"] = 1 + dataform_type["rearguard"] = 1 + + if (PACKRATLIST) { + while (getline <PACKRATLIST) { + if ($0 ~ /^#/) continue + packratlist[$3] = 1 + } + } + + # The command line should set DATAFORM. + if (!dataform_type[DATAFORM]) exit 1 +} + +$1 == "#PACKRATLIST" && $2 == PACKRATLIST { + sub(/^#PACKRATLIST[\t ]+[^\t ]+[\t ]+/, "") +} + +/^Zone/ { zone = $2 } + +DATAFORM != "main" { + in_comment = $0 ~ /^#/ + uncomment = comment_out = 0 + + # If this line should differ due to Czechoslovakia using negative SAVE values, + # uncomment the desired version and comment out the undesired one. + if (zone == "Europe/Prague" && $0 ~ /^#?[\t ]+[01]:00[\t ]/ \ + && $0 ~ /1947 Feb 23/) { + if (($(in_comment + 2) != "-") == (DATAFORM != "rearguard")) { + uncomment = in_comment + } else { + comment_out = !in_comment + } + } + + # If this line should differ due to Ireland using negative SAVE values, + # uncomment the desired version and comment out the undesired one. + Rule_Eire = $0 ~ /^#?Rule[\t ]+Eire[\t ]/ + Zone_Dublin_post_1968 \ + = (zone == "Europe/Dublin" && $0 ~ /^#?[\t ]+[01]:00[\t ]/ \ + && (!$(in_comment + 4) || 1968 < $(in_comment + 4))) + if (Rule_Eire || Zone_Dublin_post_1968) { + if ((Rule_Eire \ + || (Zone_Dublin_post_1968 && $(in_comment + 3) == "IST/GMT")) \ + == (DATAFORM != "rearguard")) { + uncomment = in_comment + } else { + comment_out = !in_comment + } + } + + # If this line should differ due to Namibia using negative SAVE values, + # uncomment the desired version and comment out the undesired one. + Rule_Namibia = $0 ~ /^#?Rule[\t ]+Namibia[\t ]/ + Zone_using_Namibia_rule \ + = (zone == "Africa/Windhoek" && $0 ~ /^#?[\t ]+[12]:00[\t ]/ \ + && ($(in_comment + 2) == "Namibia" \ + || ($(in_comment + 2) == "-" && $(in_comment + 3) == "CAT" \ + && ((1994 <= $(in_comment + 4) && $(in_comment + 4) <= 2017) \ + || in_comment + 3 == NF)))) + if (Rule_Namibia || Zone_using_Namibia_rule) { + if ((Rule_Namibia \ + ? ($9 ~ /^-/ || ($9 == 0 && $10 == "CAT")) \ + : $(in_comment + 1) == "2:00" && $(in_comment + 2) == "Namibia") \ + == (DATAFORM != "rearguard")) { + uncomment = in_comment + } else { + comment_out = !in_comment + } + } + + # If this line should differ due to Portugal benefiting from %z if supported, + # uncomment the desired version and comment out the undesired one. + if ($0 ~ /^#?[\t ]+-[12]:00[\t ]+Port[\t ]+[%+-]/) { + if (($0 ~ /%z/) == (DATAFORM == "vanguard")) { + uncomment = in_comment + } else { + comment_out = !in_comment + } + } + + # In vanguard form, use the line "Zone GMT 0 - GMT" instead of + # "Zone Etc/GMT 0 - GMT" and adjust Link lines accordingly. + # This works around a bug in TZUpdater 2.3.2. + if (/^#?(Zone|Link)[\t ]+(Etc\/)?GMT[\t ]/) { + if (($2 == "GMT") == (DATAFORM == "vanguard")) { + uncomment = in_comment + } else { + comment_out = !in_comment + } + } + + if (uncomment) { + sub(/^#/, "") + } + if (comment_out) { + sub(/^/, "#") + } + + # Prefer %z in vanguard form, explicit abbreviations otherwise. + if (DATAFORM == "vanguard") { + sub(/^(Zone[\t ]+[^\t ]+)?[\t ]+[^\t ]+[\t ]+[^\t ]+[\t ]+[-+][^\t ]+/, \ + "&CHANGE-TO-%z") + sub(/-00CHANGE-TO-%z/, "-00") + sub(/[-+][^\t ]+CHANGE-TO-/, "") + } else { + if ($0 ~ /^[^#]*%z/) { + stdoff_column = 2 * ($0 ~ /^Zone/) + 1 + rules_column = stdoff_column + 1 + stdoff = get_minutes($stdoff_column) + rules = $rules_column + stdabbr = offset_abbr(stdoff) + if (rules == "-") { + abbr = stdabbr + } else { + dstabbr_only = rules ~ /^[+0-9-]/ + if (dstabbr_only) { + dstoff = get_minutes(rules) + } else { + # The DST offset is normally an hour, but there are special cases. + if (rules == "Morocco" && NF == 3) { + dstoff = -60 + } else if (rules == "NBorneo") { + dstoff = 20 + } else if (((rules == "Cook" || rules == "LH") && NF == 3) \ + || (rules == "Uruguay" \ + && $0 ~ /[\t ](1942 Dec 14|1960|1970|1974 Dec 22)$/)) { + dstoff = 30 + } else if (rules == "Uruguay" && $0 ~ /[\t ]1974 Mar 10$/) { + dstoff = 90 + } else { + dstoff = 60 + } + } + dstabbr = offset_abbr(stdoff + dstoff) + if (dstabbr_only) { + abbr = dstabbr + } else { + abbr = stdabbr "/" dstabbr + } + } + sub(/%z/, abbr) + } + } + + # Normally, prefer whole seconds. However, prefer subseconds + # if generating vanguard form and the otherwise-undocumented + # VANGUARD_SUBSECONDS environment variable is set. + # This relies on #STDOFF comment lines in the data. + # It is for hypothetical clients that support UT offsets that are + # not integer multiples of one second (e.g., Europe/Lisbon, 1884 to 1912). + # No known clients need this currently, and this experimental + # feature may be changed or withdrawn in future releases. + if ($1 == "#STDOFF") { + stdoff = $2 + rounded_stdoff = round_to_second(stdoff) + if (DATAFORM == "vanguard" && ENVIRON["VANGUARD_SUBSECONDS"]) { + stdoff_subst[0] = rounded_stdoff + stdoff_subst[1] = stdoff + } else { + stdoff_subst[0] = stdoff + stdoff_subst[1] = rounded_stdoff + } + } else if (stdoff_subst[0]) { + stdoff_column = 2 * ($0 ~ /^Zone/) + 1 + stdoff_column_val = $stdoff_column + if (stdoff_column_val == stdoff_subst[0]) { + sub(stdoff_subst[0], stdoff_subst[1]) + } else if (stdoff_column_val != stdoff_subst[1]) { + stdoff_subst[0] = 0 + } + } + + # In rearguard form, change the Japan rule line with "Sat>=8 25:00" + # to "Sun>=9 1:00", to cater to zic before 2007 and to older Java. + if ($0 ~ /^Rule/ && $2 == "Japan") { + if (DATAFORM == "rearguard") { + if ($7 == "Sat>=8" && $8 == "25:00") { + sub(/Sat>=8/, "Sun>=9") + sub(/25:00/, " 1:00") + } + } else { + if ($7 == "Sun>=9" && $8 == "1:00") { + sub(/Sun>=9/, "Sat>=8") + sub(/ 1:00/, "25:00") + } + } + } + + # In rearguard form, change the Morocco lines with negative SAVE values + # to use positive SAVE values. + if ($2 == "Morocco") { + if ($0 ~ /^Rule/) { + if ($4 ~ /^201[78]$/ && $6 == "Oct") { + if (DATAFORM == "rearguard") { + sub(/\t2018\t/, "\t2017\t") + } else { + sub(/\t2017\t/, "\t2018\t") + } + } + + if (2019 <= $3) { + if ($8 == "2:00") { + if (DATAFORM == "rearguard") { + sub(/\t0\t/, "\t1:00\t") + } else { + sub(/\t1:00\t/, "\t0\t") + } + } else { + if (DATAFORM == "rearguard") { + sub(/\t-1:00\t/, "\t0\t") + } else { + sub(/\t0\t/, "\t-1:00\t") + } + } + } + } + if ($1 ~ /^[+0-9-]/ && NF == 3) { + if (DATAFORM == "rearguard") { + sub(/1:00\tMorocco/, "0:00\tMorocco") + sub(/\t\+01\/\+00$/, "\t+00/+01") + } else { + sub(/0:00\tMorocco/, "1:00\tMorocco") + sub(/\t\+00\/+01$/, "\t+01/+00") + } + } + } +} + +/^Zone/ { + packrat_ignored = FILENAME == PACKRATDATA && PACKRATLIST && !packratlist[$2]; +} +{ + if (packrat_ignored && $0 !~ /^Rule/) { + sub(/^/, "#") + } +} + +# Return a link line resulting by changing OLDLINE to link to TARGET +# from LINKNAME, instead of linking to OLDTARGET from LINKNAME. +# Align data columns the same as they were in OLDLINE. +# Also, replace any existing white space followed by comment with COMMENT. +function make_linkline(oldline, target, linkname, oldtarget, comment, \ + oldprefix, oldprefixlen, oldtargettabs, \ + replsuffix, targettabs) +{ + oldprefix = "Link\t" oldtarget "\t" + oldprefixlen = length(oldprefix) + if (substr(oldline, 1, oldprefixlen) == oldprefix) { + # Use tab stops to preserve LINKNAME's column. + replsuffix = substr(oldline, oldprefixlen + 1) + sub(/[\t ]*#.*/, "", replsuffix) + oldtargettabs = int(length(oldtarget) / 8) + 1 + targettabs = int(length(target) / 8) + 1 + for (; targettabs < oldtargettabs; targettabs++) { + replsuffix = "\t" replsuffix + } + for (; oldtargettabs < targettabs && replsuffix ~ /^\t/; targettabs--) { + replsuffix = substr(replsuffix, 2) + } + } else { + # Odd format line; don't bother lining up its replacement nicely. + replsuffix = linkname + } + return "Link\t" target "\t" replsuffix comment +} + +/^Link/ && $4 == "#=" && DATAFORM == "vanguard" { + $0 = make_linkline($0, $5, $3, $2) +} + +# If a Link line is followed by a Link or Zone line for the same data, comment +# out the Link line. This can happen if backzone overrides a Link +# with a Zone or a different Link. +/^Zone/ { + sub(/^Link/, "#Link", line[linkline[$2]]) +} +/^Link/ { + sub(/^Link/, "#Link", line[linkline[$3]]) + linkline[$3] = NR + linktarget[$3] = $2 +} + +{ line[NR] = $0 } + +function cut_link_chains_short( \ + l, linkname, t, target) +{ + for (linkname in linktarget) { + target = linktarget[linkname] + t = linktarget[target] + if (t) { + # TARGET is itself a link name. Replace the line "Link TARGET LINKNAME" + # with "Link T LINKNAME #= TARGET", where T is at the end of the chain + # of links that LINKNAME points to. + while ((u = linktarget[t])) { + t = u + } + l = linkline[linkname] + line[l] = make_linkline(line[l], t, linkname, target, "\t#= " target) + } + } +} + +END { + if (DATAFORM != "vanguard") { + cut_link_chains_short() + } + for (i = 1; i <= NR; i++) + print line[i] +} diff --git a/share/zoneinfo/zishrink.awk b/share/zoneinfo/zishrink.awk new file mode 100644 index 00000000000..66968e8648e --- /dev/null +++ b/share/zoneinfo/zishrink.awk @@ -0,0 +1,356 @@ +# Convert tzdata source into a smaller version of itself. + +# Contributed by Paul Eggert. This file is in the public domain. + +# This is not a general-purpose converter; it is designed for current tzdata. +# 'zic' should treat this script's output as if it were identical to +# this script's input. + +# Record a hash N for the new name NAME, checking for collisions. + +function record_hash(n, name) +{ + if (used_hashes[n]) { + printf "# ! collision: %s %s\n", used_hashes[n], name + exit 1 + } + used_hashes[n] = name +} + +# Return a shortened rule name representing NAME, +# and record this relationship to the hash table. + +function gen_rule_name(name, \ + n) +{ + # Use a simple mnemonic: the first two letters. + n = substr(name, 1, 2) + record_hash(n, name) + # printf "# %s = %s\n", n, name + return n +} + +function prehash_rule_names( \ + name) +{ + # Rule names are not part of the tzdb API, so substitute shorter + # ones. Shortening them consistently from one release to the next + # simplifies comparison of the output. That being said, the + # 1-letter names below are not standardized in any way, and can + # change arbitrarily from one release to the next, as the main goal + # here is compression not comparison. + + # Abbreviating these rules names to one letter saved the most space + # circa 2018e. + rule["Arg"] = "A" + rule["Brazil"] = "B" + rule["Canada"] = "C" + rule["Denmark"] = "D" + rule["EU"] = "E" + rule["France"] = "F" + rule["GB-Eire"] = "G" + rule["Halifax"] = "H" + rule["Italy"] = "I" + rule["Jordan"] = "J" + rule["Egypt"] = "K" # "Kemet" in ancient Egyptian + rule["Libya"] = "L" + rule["Morocco"] = "M" + rule["Neth"] = "N" + rule["Poland"] = "O" # arbitrary + rule["Palestine"] = "P" + rule["Cuba"] = "Q" # Its start sounds like "Q". + rule["Russia"] = "R" + rule["Syria"] = "S" + rule["Turkey"] = "T" + rule["Uruguay"] = "U" + rule["Vincennes"] = "V" + rule["Winn"] = "W" + rule["Mongol"] = "X" # arbitrary + rule["NT_YK"] = "Y" + rule["Zion"] = "Z" + rule["Austria"] = "a" + rule["Belgium"] = "b" + rule["C-Eur"] = "c" + rule["Algeria"] = "d" # country code DZ + rule["E-Eur"] = "e" + rule["Taiwan"] = "f" # Formosa + rule["Greece"] = "g" + rule["Hungary"] = "h" + rule["Iran"] = "i" + rule["StJohns"] = "j" + rule["Chatham"] = "k" # arbitrary + rule["Lebanon"] = "l" + rule["Mexico"] = "m" + rule["Tunisia"] = "n" # country code TN + rule["Moncton"] = "o" # arbitrary + rule["Port"] = "p" + rule["Albania"] = "q" # arbitrary + rule["Regina"] = "r" + rule["Spain"] = "s" + rule["Toronto"] = "t" + rule["US"] = "u" + rule["Louisville"] = "v" # ville + rule["Iceland"] = "w" # arbitrary + rule["Chile"] = "x" # arbitrary + rule["Para"] = "y" # country code PY + rule["Romania"] = "z" # arbitrary + rule["Macau"] = "_" # arbitrary + + # Use ISO 3166 alpha-2 country codes for remaining names that are countries. + # This is more systematic, and avoids collisions (e.g., Malta and Moldova). + rule["Armenia"] = "AM" + rule["Aus"] = "AU" + rule["Azer"] = "AZ" + rule["Barb"] = "BB" + rule["Dhaka"] = "BD" + rule["Bulg"] = "BG" + rule["Bahamas"] = "BS" + rule["Belize"] = "BZ" + rule["Swiss"] = "CH" + rule["Cook"] = "CK" + rule["PRC"] = "CN" + rule["Cyprus"] = "CY" + rule["Czech"] = "CZ" + rule["Germany"] = "DE" + rule["DR"] = "DO" + rule["Ecuador"] = "EC" + rule["Finland"] = "FI" + rule["Fiji"] = "FJ" + rule["Falk"] = "FK" + rule["Ghana"] = "GH" + rule["Guat"] = "GT" + rule["Hond"] = "HN" + rule["Haiti"] = "HT" + rule["Eire"] = "IE" + rule["Iraq"] = "IQ" + rule["Japan"] = "JP" + rule["Kyrgyz"] = "KG" + rule["ROK"] = "KR" + rule["Latvia"] = "LV" + rule["Lux"] = "LX" + rule["Moldova"] = "MD" + rule["Malta"] = "MT" + rule["Mauritius"] = "MU" + rule["Namibia"] = "NA" + rule["Nic"] = "NI" + rule["Norway"] = "NO" + rule["Peru"] = "PE" + rule["Phil"] = "PH" + rule["Pakistan"] = "PK" + rule["Sudan"] = "SD" + rule["Salv"] = "SV" + rule["Tonga"] = "TO" + rule["Vanuatu"] = "VU" + + # Avoid collisions. + rule["Detroit"] = "Dt" # De = Denver + + for (name in rule) { + record_hash(rule[name], name) + } +} + +function make_line(n, field, \ + f, r) +{ + r = field[1] + for (f = 2; f <= n; f++) + r = r " " field[f] + return r +} + +# Process the input line LINE and save it for later output. + +function process_input_line(line, \ + f, field, end, i, n, r, startdef, \ + linkline, ruleline, zoneline) +{ + # Remove comments, normalize spaces, and append a space to each line. + sub(/#.*/, "", line) + line = line " " + gsub(/[\t ]+/, " ", line) + + # Abbreviate keywords and determine line type. + linkline = sub(/^Link /, "L ", line) + ruleline = sub(/^Rule /, "R ", line) + zoneline = sub(/^Zone /, "Z ", line) + + # Replace FooAsia rules with the same rules without "Asia", as they + # are duplicates. + if (match(line, /[^ ]Asia /)) { + if (ruleline) return + line = substr(line, 1, RSTART) substr(line, RSTART + 5) + } + + # Abbreviate times. + while (match(line, /[: ]0+[0-9]/)) + line = substr(line, 1, RSTART) substr(line, RSTART + RLENGTH - 1) + while (match(line, /:0[^:]/)) + line = substr(line, 1, RSTART - 1) substr(line, RSTART + 2) + + # Abbreviate weekday names. + while (match(line, / (last)?(Mon|Wed|Fri)[ <>]/)) { + end = RSTART + RLENGTH + line = substr(line, 1, end - 4) substr(line, end - 1) + } + while (match(line, / (last)?(Sun|Tue|Thu|Sat)[ <>]/)) { + end = RSTART + RLENGTH + line = substr(line, 1, end - 3) substr(line, end - 1) + } + + # Abbreviate "max", "min", "only" and month names. + gsub(/ max /, " ma ", line) + gsub(/ min /, " mi ", line) + gsub(/ only /, " o ", line) + gsub(/ Jan /, " Ja ", line) + gsub(/ Feb /, " F ", line) + gsub(/ Apr /, " Ap ", line) + gsub(/ Aug /, " Au ", line) + gsub(/ Sep /, " S ", line) + gsub(/ Oct /, " O ", line) + gsub(/ Nov /, " N ", line) + gsub(/ Dec /, " D ", line) + + # Strip leading and trailing space. + sub(/^ /, "", line) + sub(/ $/, "", line) + + # Remove unnecessary trailing zero fields. + sub(/ 0+$/, "", line) + + # Remove unnecessary trailing days-of-month "1". + if (match(line, /[A-Za-z] 1$/)) + line = substr(line, 1, RSTART) + + # Remove unnecessary trailing " Ja" (for January). + sub(/ Ja$/, "", line) + + n = split(line, field) + + # Record which rule names are used, and generate their abbreviations. + f = zoneline ? 4 : linkline || ruleline ? 0 : 2 + r = field[f] + if (r ~ /^[^-+0-9]/) { + rule_used[r] = 1 + } + + # If this zone supersedes an earlier one, delete the earlier one + # from the saved output lines. + startdef = "" + if (zoneline) + zonename = startdef = field[2] + else if (linkline) + zonename = startdef = field[3] + else if (ruleline) + zonename = "" + if (startdef) { + i = zonedef[startdef] + if (i) { + do + output_line[i - 1] = "" + while (output_line[i++] ~ /^[-+0-9]/); + } + } + zonedef[zonename] = nout + 1 + + # Save the line for later output. + output_line[nout++] = make_line(n, field) +} + +function omit_unused_rules( \ + i, field) +{ + for (i = 0; i < nout; i++) { + split(output_line[i], field) + if (field[1] == "R" && !rule_used[field[2]]) { + output_line[i] = "" + } + } +} + +function abbreviate_rule_names( \ + abbr, f, field, i, n, r) +{ + for (i = 0; i < nout; i++) { + n = split(output_line[i], field) + if (n) { + f = field[1] == "Z" ? 4 : field[1] == "L" ? 0 : 2 + r = field[f] + if (r ~ /^[^-+0-9]/) { + abbr = rule[r] + if (!abbr) { + rule[r] = abbr = gen_rule_name(r) + } + field[f] = abbr + output_line[i] = make_line(n, field) + } + } + } +} + +function output_saved_lines( \ + i) +{ + for (i = 0; i < nout; i++) + if (output_line[i]) + print output_line[i] +} + +BEGIN { + # Files that the output normally depends on. + default_dep["africa"] = 1 + default_dep["antarctica"] = 1 + default_dep["asia"] = 1 + default_dep["australasia"] = 1 + default_dep["backward"] = 1 + default_dep["etcetera"] = 1 + default_dep["europe"] = 1 + default_dep["factory"] = 1 + default_dep["northamerica"] = 1 + default_dep["southamerica"] = 1 + default_dep["ziguard.awk"] = 1 + default_dep["zishrink.awk"] = 1 + + # Output a version string from 'version' and related configuration variables + # supported by tzdb's Makefile. If you change the makefile or any other files + # that affect the output of this script, you should append '-SOMETHING' + # to the contents of 'version', where SOMETHING identifies what was changed. + + ndeps = split(deps, dep) + ddeps = "" + for (i = 1; i <= ndeps; i++) { + if (default_dep[dep[i]]) { + default_dep[dep[i]]++ + } else { + ddeps = ddeps " " dep[i] + } + } + for (d in default_dep) { + if (default_dep[d] == 1) { + ddeps = ddeps " !" d + } + } + print "# version", version + if (dataform != "main") { + print "# dataform", dataform + } + if (redo != "posix_right") { + print "# redo " redo + } + if (ddeps) { + print "# ddeps" ddeps + } + print "# This zic input file is in the public domain." + + prehash_rule_names() +} + +/^[\t ]*[^#\t ]/ { + process_input_line($0) +} + +END { + omit_unused_rules() + abbreviate_rule_names() + output_saved_lines() +} |