7.2. CSV COVID-19

7.2.1. Code

#%% Imports
from datetime import date
import pandas as pd
from pandas.tseries.holiday import AbstractHolidayCalendar, Holiday
from pandas.tseries.holiday import EasterMonday, Easter
from pandas.tseries.offsets import Day
import matplotlib.pyplot as plt
import matplotlib.axes


#%% Settings
pd.set_option('display.width', 200)
pd.set_option('display.max_columns', 15)
pd.set_option('display.max_rows', 100)


#%% Data Sources
#CONFIRMED = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
#DEATHS = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv'
#RECOVERED = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv'

CONFIRMED = 'https://python.astrotech.io/_static/covid19-confirmed.csv'
DEATHS = 'https://python.astrotech.io/_static/covid19-deaths.csv'
RECOVERED = 'https://python.astrotech.io/_static/covid19-recovered.csv'


#%% Data Frames
confirmed = pd.read_csv(CONFIRMED)
deaths = pd.read_csv(DEATHS)
recovered = pd.read_csv(RECOVERED)


#%% Get Country from DataFrame
def covid19(country: str = None) -> pd.DataFrame:
    """
    Get Confirmed, Deaths, Recovered for given country

    >>> covid19('Poland').loc['2022-01-01']
    Confirmed    4120248
    Deaths         97559
    Recovered          0
    Name: 2022-01-01 00:00:00, dtype: int64

    >>> covid19('France').loc['2022-01-01']
    Confirmed    10296909
    Deaths         124839
    Recovered           0
    Name: 2022-01-01 00:00:00, dtype: int64

    >>> covid19().loc['2022-01-01']
    Confirmed    289928638
    Deaths         5440827
    Recovered            0
    Name: 2022-01-01 00:00:00, dtype: int64
    """
    def _get(data: pd.DataFrame, country: str = None) -> pd.Series:
        """
        Get Country from DataFrame

        >>> _get(confirmed, 'Poland').loc['2022-01-01']
        4120248
        >>> _get(deaths, 'Poland').loc['2022-01-01']
        97559
        >>> _get(recovered, 'Poland').loc['2022-01-01']
        0
        """
        if country is not None:
            data = data.query('`Country/Region` == @country')
        return (data
                .transpose()
                .iloc[4:]
                .sum(axis='columns')
                .astype('int64')
                .rename(pd.to_datetime, axis='index'))

    return pd.DataFrame({
        'Confirmed': _get(confirmed, country),
        'Deaths': _get(deaths, country),
        'Recovered': _get(recovered, country)})


#%% Calendars
class PLHolidayCalendar(AbstractHolidayCalendar):
    """
    Custom Holiday calendar for Poland based on
    https://en.wikipedia.org/wiki/Public_holidays_in_Poland
    """
    rules = [
        Holiday('New Years Day', month=1, day=1),
        Holiday('Epiphany', month=1, day=6),
        Holiday('Easter', month=1, day=1, offset=[Easter()]),
        EasterMonday,
        Holiday('May Day', month=5, day=1),
        Holiday('Constitution Day', month=5, day=3),
        Holiday('Pentecost Sunday', month=1, day=1, offset=[Easter(), Day(49)]),
        Holiday('Corpus Christi', month=1, day=1, offset=[Easter(), Day(60)]),
        Holiday('Assumption of the Blessed Virgin Mary', month=8, day=15),
        Holiday('All Saints Day', month=11, day=1),
        Holiday('Independence Day', month=11, day=11),
        Holiday('Christmas Day', month=12, day=25),
        Holiday('Second Day of Christmastide', month=12, day=26)]


#%% Show trendline
def plot_trendline(data: pd.DataFrame) -> matplotlib.axes.Axes:
    return (data
            .loc[:, ['Confirmed','Deaths']]
            .plot(kind='line',
                  subplots=True,
                  layout=(2, 1),
                  figsize=(10, 10)))


#%% Show fatalities
def plot_fatalities(data: pd.DataFrame) -> matplotlib.axes.Axes:
    return ((data['Deaths'] / data['Confirmed'])
            .mul(100)  # convert to percent
            .round(2)
            .plot(kind='line',
                  title='Percent of deaths vs new cases in last two weeks',
                  xlabel='Day',
                  ylabel='Percent',
                  ylim=(0.0, 6.0),
                  figsize=(10, 10),
                  grid=True))


#%% Confirmed cases day-by-day difference
def plot_confirmed_daily(data: pd.DataFrame) -> matplotlib.axes.Axes:
    return data['Confirmed'].diff().plot()


#%% Covid infection waves
def plot_confirmed_waves(data:pd.DataFrame) -> matplotlib.axes.Axes:
    return data['Confirmed'].diff().resample('W').median().plot()


#%% Confirmed cases in last two weeks
def plot_confirmed_last(data: pd.DataFrame, freq='2W') -> matplotlib.axes.Axes:
    return data['Confirmed'].last(freq).diff().plot()


#%% Confirmed cases every month
def plot_confirmed_monthly(data: pd.DataFrame) -> matplotlib.axes.Axes:
    return data['Confirmed'].resample('M').sum().plot()


#%%
def plot_confirmed_after_holidays(
        data: pd.DataFrame,
        since: date | str | None = '2021-01-01',
        until: date | str | None = '2022-02-07',
        days: int = 14,
        calendar: AbstractHolidayCalendar = PLHolidayCalendar(),
    ) -> matplotlib.axes.Axes:
    """
    Confirmed cases in period of 14 days after holidays
    """
    def _get(since, days):
        return (data
                .loc[since:, 'Confirmed']
                .iloc[:days]
                .reset_index(drop=True))

    data = {column: _get(since=holiday, days=days)
            for holiday in calendar.holidays(since, until)
            if (column := holiday.strftime('%Y-%m-%d'))}

    return pd.DataFrame(data).diff().plot(
        kind='line',
        subplots=True,
        layout=(15,1),
        sharex=True,
        figsize=(5, 15),
        grid=True)


#%% Run
if __name__ == '__main__':
    poland = covid19('Poland')
    usa = covid19('US')
    france = covid19('France')
    china = covid19('China')
    world = covid19()


    data = poland.loc['2020-01-01':'2022-02-01']

    plot_trendline(data)
    # plt.show()

    plot_fatalities(data)
    # plt.show()

    plot_confirmed_daily(data)
    # plt.show()

    plot_confirmed_waves(data)
    # plt.show()

    plot_confirmed_last(data)
    # plt.show()

    plot_confirmed_monthly(data)
    # plt.show()

    plot_confirmed_after_holidays(data)
    # plt.show()



"""TODO:

# Resample
poland['Confirmed'].shift(periods=1, freq='D').plot(kind='line')


# Z rozróżnianiem na kwartały (Q)
plot = poland['Confirmed'].resample('Q').plot(kind='line', legend=True)
plot[0].name = '2020-Q1'
plot[1].name = '2020-Q2'
plot[2].name = '2020-Q3'
plot[3].name = '2020-Q4'
plot[4].name = '2021-Q1'
plot[5].name = '2021-Q2'
plot[6].name = '2021-Q3'
plot[7].name = '2021-Q4'
plot[8].name = '2022-Q1'
plt.show()


# Makrotrendy
poland['Confirmed'].diff().rolling(window=14).median().plot()
plt.show()

poland['Confirmed'].diff().rolling(window=14).median().plot()
plt.show()

poland['Confirmed'].diff().resample('W').median().plot()
plt.show()

poland['Confirmed'].diff().resample('M').median().plot()
plt.show()

poland['Confirmed'].diff().resample('Q').median().plot()
plt.show()
"""
from datetime import date

import pandas as pd
from doctest import testmod as run_tests
from matplotlib import pyplot as plt
import pandas as pd
from pandas.tseries.holiday import AbstractHolidayCalendar, Holiday, EasterMonday, Easter
from pandas.tseries.offsets import Day

PROCENT = 1


#%%

CONFIRMED = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'

RECOVERED = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv'

DEATHS = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv'


confirmed = pd.read_csv(CONFIRMED).convert_dtypes()
recovered = pd.read_csv(RECOVERED).convert_dtypes()
deaths = pd.read_csv(DEATHS).convert_dtypes()


class PLHolidayCalendar(AbstractHolidayCalendar):
    """
    Custom Holiday calendar for Poland based on
    https://en.wikipedia.org/wiki/Public_holidays_in_Poland

    >>> PLHolidayCalendar().holidays(start='2000-01-01', end='2000-12-31')
    DatetimeIndex(['2000-01-01', '2000-01-06', '2000-04-23', '2000-04-24',
                   '2000-05-01', '2000-05-03', '2000-06-11', '2000-06-22',
                   '2000-08-15', '2000-11-01', '2000-11-11', '2000-12-25',
                   '2000-12-26'],
                  dtype='datetime64[ns]', freq=None)
    """
    rules = [
        Holiday('New Years Day', month=1, day=1),
        Holiday('Epiphany', month=1, day=6),
        Holiday('Easter', month=1, day=1, offset=[Easter()]),
        EasterMonday,
        Holiday('May Day', month=5, day=1),
        Holiday('Constitution Day', month=5, day=3),
        Holiday('Pentecost Sunday', month=1, day=1, offset=[Easter(), Day(49)]),
        Holiday('Corpus Christi', month=1, day=1, offset=[Easter(), Day(60)]),
        Holiday('Assumption of the Blessed Virgin Mary', month=8, day=15),
        Holiday('All Saints Day', month=11, day=1),
        Holiday('Independence Day', month=11, day=11),
        Holiday('Christmas Day', month=12, day=25),
        Holiday('Second Day of Christmastide', month=12, day=26),
    ]


#%%

def _parse(data, country, name):
    """
    >>> _parse(confirmed, 'Poland', name='confirmed').loc['2021-08-04']
    confirmed    2883448
    Name: 2021-08-04 00:00:00, dtype: int64

    >>> _parse(confirmed, 'Poland', name='confirmed').loc['2021-08-05']
    confirmed    2883624
    Name: 2021-08-05 00:00:00, dtype: int64

    >>> _parse(recovered, 'Poland', name='recovered').loc['2021-08-04']
    recovered    2653981
    Name: 2021-08-04 00:00:00, dtype: int64

    >>> _parse(recovered, 'Poland', name='recovered').loc['2021-08-05']
    recovered    0
    Name: 2021-08-05 00:00:00, dtype: int64

    >>> _parse(deaths, 'Poland', name='deaths').loc['2021-08-04']
    deaths    75269
    Name: 2021-08-04 00:00:00, dtype: int64

    >>> _parse(deaths, 'Poland', name='deaths').loc['2021-08-05']
    deaths    75275
    Name: 2021-08-05 00:00:00, dtype: int64
    """
    if country is not None:
        query = data['Country/Region'] == country
        data = data.loc[query]

    return (
        data
        .transpose()
        .iloc[4:]
        .sum(axis='columns')
        .astype('int')
        .to_frame()
        .rename(lambda x: name, axis='columns')
        .rename(pd.to_datetime, axis='index'))

run_tests()

#%%
def get(country=None):
    """
    >>> get('Poland').loc['2021-08-04']
    confirmed    2883448
    recovered    2653981
    deaths         75269
    Name: 2021-08-04 00:00:00, dtype: int64

    >>> get('Poland').loc['2021-08-05']
    confirmed    2883624
    recovered          0
    deaths         75275
    Name: 2021-08-05 00:00:00, dtype: int64

    >>> get('United Kingdom').loc['2021-08-04']
    confirmed    5980830
    recovered      24693
    deaths        157209
    Name: 2021-08-04 00:00:00, dtype: int64

    >>> get('United Kingdom').loc['2021-08-05']
    confirmed    6010860
    recovered          0
    deaths        157314
    Name: 2021-08-05 00:00:00, dtype: int64

    >>> get().loc['2021-08-04']
    confirmed    200755908
    recovered    130899061
    deaths         4283088
    Name: 2021-08-04 00:00:00, dtype: int64

    >>> get().loc['2021-08-05']
    confirmed    201441516
    recovered            0
    deaths         4294079
    Name: 2021-08-05 00:00:00, dtype: int64
    """
    return pd.concat((
       _parse(confirmed, country, name='confirmed'),
       _parse(recovered, country, name='recovered'),
       _parse(deaths, country, name='deaths'),
    ), axis='columns')

run_tests()

#%%

poland = get('Poland')
germany = get('Germany')
india = get('India')

uk = get('United Kingdom')
france = get('France')
china = get('China')

world = get()


#%%

def liczba_potwierdzonych_oraz_smierci_w_tygodniowych_okresach():
    return world.loc[:, ['confirmed', 'deaths']].resample('W').sum()

def liczba_zachorowan_na_jeden_przypadek_smiertelny():
    return world['confirmed'] / world['deaths']

def procent_smiertelnosci():
    return world['deaths'] / world['confirmed'] * 100*PROCENT

def get_holidays(year: int, calendar: AbstractHolidayCalendar) -> pd.DatetimeIndex:
    return calendar.holidays(start=date(year, 1, 1), end=date(year, 12, 31))

def liczba_zachorowan_po_swietach(year, calendar=PLHolidayCalendar()):
    """
    >>> data = liczba_zachorowan_po_swietach(year=2022)
    >>> plot = data.plot(
    ...    kind='line',
    ...    subplots=True,
    ...    sharey=True,
    ...    sharex=True,
    ...    grid=True,
    ...    figsize=(10,20))
    >>> # plt.show()
    """
    today = pd.Timestamp('today')
    holidays = get_holidays(year, calendar)
    holidays_until_today = holidays[holidays < today]

    def days_after_holiday(holiday, days=10):
        return (poland
                .loc[holiday:, 'confirmed']
                .iloc[:days]
                .diff()
                .reset_index(drop=True)
                .iloc[1:]
                .astype('int'))

    return pd.DataFrame({
        column_name: days_after_holiday(swieto)
        for i, swieto in enumerate(holidays_until_today)
        if (column_name := format(swieto, '%Y-%m-%d'))
    })

7.2.2. Plots

../../_images/covid19-poland-confirmed-daily.png

Figure 7.19. Confirmed daily plot for COVID19 pandemy in Poland.

../../_images/covid19-poland-confirmed-holidays.png

Figure 7.20. Confirmed holidays plot for COVID19 pandemy in Poland.

../../_images/covid19-poland-confirmed-last.png

Figure 7.21. Confirmed last plot for COVID19 pandemy in Poland.

../../_images/covid19-poland-confirmed-monthly.png

Figure 7.22. Confirmed monthly plot for COVID19 pandemy in Poland.

../../_images/covid19-poland-confirmed-waves.png

Figure 7.23. Confirmed waves plot for COVID19 pandemy in Poland.

../../_images/covid19-poland-fatalities.png

Figure 7.24. Fatalities plot for COVID19 pandemy in Poland.

../../_images/covid19-poland-trendline.png

Figure 7.25. Trendline plot for COVID19 pandemy in Poland.