Skip to content

plot_hiv_by_age_and_gender

create_title(base_title='', node_id=None, gender=None, show_avg_per_run=False, show_fraction=False, show_fraction_of=False, fraction_of_str='', hiv_negative=False, has_age_bins=False)

Use the input arguments to create a title for the plot (and filename).

Parameters:

Name Type Description Default
base_title str

This is the core string to place in the title. It describes the specific data being plotted.

''
node_id int

The ID of the node for which the data is being filtered for.

None
gender str

The string (Male or Female) for the gender that data is being filtered for.

None
show_avg_per_run bool

True indicates that the data is an average over multiple runs.

False
show_fraction bool

True indicates that the data is not true counts but a fraction (i.e. a count divided by another counter)

False
show_fraction_of bool

When the denominator of the fraction can be different things, say population or infected, this allows you to specify the option that is not population.

False
fraction_of_str str

If 'show_fraction_of' is true, then this argument an be used to include in the plot what the denominator is.

''
hiv_negative bool

If True, then the title will indicate that the data is for people without HIV.

False
has_age_bins bool

If True, then 'by Age' is added to the title.

False

Returns:

Type Description
str

A string to be used a the top line title of the plot.

Source code in emodpy_hiv/plotting/plot_hiv_by_age_and_gender.py
def create_title(base_title: str = "",
                 node_id: int = None,
                 gender: str = None,
                 show_avg_per_run: bool = False,
                 show_fraction: bool = False,
                 show_fraction_of: bool = False,
                 fraction_of_str: str = "",
                 hiv_negative: bool = False,
                 has_age_bins: bool = False):
    """
    Use the input arguments to create a title for the plot (and filename).

    Args:
        base_title (str, optional):
            This is the core string to place in the title.  It describes the specific data being plotted.

        node_id (int, optional):
            The ID of the node for which the data is being filtered for.

        gender (str, optional):
            The string (Male or Female) for the gender that data is being filtered for.

        show_avg_per_run (bool, optional):
            True indicates that the data is an average over multiple runs.

        show_fraction (bool, optional):
            True indicates that the data is not true counts but a fraction
            (i.e. a count divided by another counter)

        show_fraction_of (bool, optional):
            When the denominator of the fraction can be different things,
            say population or infected, this allows you to specify the option
            that is not population.

        fraction_of_str (str, optional):
            If 'show_fraction_of' is true, then this argument an be used to include
            in the plot what the denominator is.

        hiv_negative (bool, optional):
            If True, then the title will indicate that the data is for people without HIV.

        has_age_bins (bool, optional):
            If True, then 'by Age' is added to the title.

    Returns:
        (str): A string to be used a the top line title of the plot.
    """
    title = ""
    if show_avg_per_run and not show_fraction:
        title = "Average Per Run "
    elif not show_avg_per_run and show_fraction:
        title = title + "Fraction of "
    elif show_avg_per_run and show_fraction:
        title = "Average Fraction of "
    if show_fraction:
        if show_fraction_of:
            title = title + fraction_of_str
    if gender == "Female":
        title = title + "Females"
    elif gender == "Male":
        title = title + "Males"
    else:
        title = title + "People"

    if hiv_negative:
        title = title + " Without HIV"

    if base_title:
        title = title + " " + base_title

    if has_age_bins:
        title = title + " by Age"

    if node_id is not None:
        title = title + " - Node " + str(node_id)

    if not show_avg_per_run:
        title = title + " Per Run"

    return title

create_y_axis_name(base_title='', node_id=None, gender=None, show_avg_per_run=False, show_fraction=False, show_fraction_of=False, fraction_of_str='', has_age_bins=False)

Given the arguments, create a label for the y-axis that describes the data being plotted.

Parameters:

Name Type Description Default
base_title str

This is the core string to place in the y-label. It describes what is being plotted.

''
node_id int

TBD

None
gender str

The string (Male or Female) for the gender that data is being filtered for.

None
show_avg_per_run bool

TBD

False
show_fraction bool

True indicates that the data is not true counts but a fraction (i.e. a count divided by another counter)

False
show_fraction_of bool

When the denominator of the fraction can be different things, say population or infected, this allows you to specify the option that is not population.

False
fraction_of_str str

If 'show_fraction_of' is true, then this argument an be used to include in the plot what the denominator is.

''
has_age_bins bool

TBD

False

Returns:

Type Description
str

A string to be used a the top line title of the plot.

Source code in emodpy_hiv/plotting/plot_hiv_by_age_and_gender.py
def create_y_axis_name(base_title: str = "",
                       node_id: int = None,
                       gender: str = None,
                       show_avg_per_run: bool = False,
                       show_fraction: bool = False,
                       show_fraction_of: bool = False,
                       fraction_of_str: str = "",
                       has_age_bins: bool = False):
    """
    Given the arguments, create a label for the y-axis that describes the data being plotted.

    Args:
        base_title (str, optional):
            This is the core string to place in the y-label.  It describes what is being plotted.

        node_id (int, optional):
            TBD

        gender (str, optional):
            The string (Male or Female) for the gender that data is being filtered for.

        show_avg_per_run (bool, optional):
            TBD

        show_fraction (bool, optional):
            True indicates that the data is not true counts but a fraction
            (i.e. a count divided by another counter)

        show_fraction_of (bool, optional):
            When the denominator of the fraction can be different things,
            say population or infected, this allows you to specify the option
            that is not population.

        fraction_of_str (str, optional):
            If 'show_fraction_of' is true, then this argument an be used to include
            in the plot what the denominator is.

        has_age_bins (bool, optional):
            TBD

    Returns:
        (str): A string to be used a the top line title of the plot.
    """
    y_axis_name = "Number of "
    if show_fraction:
        y_axis_name = "Fraction of "
        if show_fraction_of:
            y_axis_name = y_axis_name + fraction_of_str
    if gender == "Female":
        y_axis_name = y_axis_name + "Females"
    elif gender == "Male":
        y_axis_name = y_axis_name + "Males"
    else:
        y_axis_name = y_axis_name + "People"
    y_axis_name = y_axis_name + base_title

    return y_axis_name

extract_population_data(filename, node_id=None, gender=None, age_bin=None, other_strat_column_name=None, other_strat_value=None)

Extract population data for a specific node, gender and age.

It is assumed that the file has ages every 5 years from 0 to 100.

Source code in emodpy_hiv/plotting/plot_hiv_by_age_and_gender.py
def extract_population_data(filename: str,
                            node_id: int = None,
                            gender: str = None,
                            age_bin: float = None,
                            other_strat_column_name: str = None,
                            other_strat_value: Union[int, float, str] = None):
    """
    Extract population data for a specific node, gender and age.

    It is assumed that the file has ages every 5 years from 0 to 100.
    """
    df = pd.read_csv(filename)

    # -------------------------------------------
    # Verify the CSV file has the correct columns
    # -------------------------------------------
    if COL_NAME_POP not in df.columns:
        raise ValueError(f"'{COL_NAME_POP}' column does not exist in the file({filename}).")
    if other_strat_column_name and (other_strat_column_name not in df.columns):
        raise ValueError(f"'{other_strat_column_name}' column does not exist in the file({filename}).")

    if( ((other_strat_column_name is not None) and (other_strat_value is     None)) or # noqa: E201, E271, E275, W504
        ((other_strat_column_name is     None) and (other_strat_value is not None)) ): # noqa: E202, E271, E129
        raise ValueError("Both 'other_strat_column_name' and 'other_strat_value' must be specified.")

    # ---------------------------------------------------------------
    # Determine which columns to put in the pivot table and create it
    # ---------------------------------------------------------------
    pv_columns = []
    if node_id is not None:
        pv_columns.append(COL_NAME_ON_ART)
    if gender is not None:
        pv_columns.append(COL_NAME_GENDER)
    if other_strat_column_name is not None:
        pv_columns.append(other_strat_column_name)
    if age_bin is not None:
        pv_columns.append(COL_NAME_AGE)

    gender_id = 1
    if gender == "Male":
        gender_id = 0

    pv = df.pivot_table(index=COL_NAME_YEAR,
                        columns=pv_columns,
                        values=COL_NAME_POP,
                        aggfunc="sum")

    # ------------------------------------------------------------
    # Extract data from pivot table and put in dataframe.
    # If the input age_bin is 80, we need to include the data from
    # the older ages because 80 for UN World Pop is really 80+.
    # ------------------------------------------------------------
    df2 = pd.DataFrame()
    df2.index = pv.index.values
    df2[COL_NAME_POP] = 0

    list_for_tuple = []
    if node_id is not None:
        list_for_tuple.append(node_id)
    if gender is not None:
        list_for_tuple.append(gender_id)
    if other_strat_column_name is not None:
        list_for_tuple.append(other_strat_value)
    if age_bin is not None:
        age_bin_list = [age_bin]
        if age_bin == 80:
            age_bin_list = [80, 85, 90, 95, 100]
        for age in age_bin_list:
            list_for_tuple_with_age = list_for_tuple.copy()
            list_for_tuple_with_age.append(age)
            if len(list_for_tuple_with_age) == 1:
                column_tuple = list_for_tuple_with_age[0]
            else:
                column_tuple = tuple(list_for_tuple_with_age)
            df2[COL_NAME_POP] = df2[COL_NAME_POP] + pv[column_tuple]
    else:
        if len(list_for_tuple) == 1:
            column_tuple = list_for_tuple[0]
        else:
            column_tuple = tuple(list_for_tuple)
        df2[COL_NAME_POP] = pv[column_tuple]

    return df2

extract_population_data_by_stratification(filename, node_id=None, gender=None, age_bin_list=None, start_column_name=None, strat_values=None)

Extract population data such that you get the population for each stratification.

Source code in emodpy_hiv/plotting/plot_hiv_by_age_and_gender.py
def extract_population_data_by_stratification(filename: str,
                                              node_id: int = None,
                                              gender: str = None,
                                              age_bin_list: list[float] = None,
                                              start_column_name: str = None,
                                              strat_values: list[str] = None):
    """
    Extract population data such that you get the population for each stratification.
    """
    if age_bin_list is not None and len(age_bin_list) < 2:
        raise ValueError("'age_bin_list' must have at least two values.\n"
                         + "The second value is the max of the i-th bin and the min of the (i+1)-th bin.")

    df = pd.read_csv(filename)

    # --------------------------------------------
    # Verify the CSV file has the expected columns
    # --------------------------------------------
    if COL_NAME_POP not in df.columns:
        raise ValueError(f"'Population' column does not exist in the file({filename}).")
    if start_column_name not in df.columns:
        raise ValueError(f"'{start_column_name}' column does not exist in the file({filename}).")

    if strat_values is None:
        strat_values = df[start_column_name].unique()
        strat_values = [x for x in strat_values if str(x) != 'nan']

    # -----------------------------------------------------
    # Determine what data to put in to the pivot table and
    # create pivot table
    # -----------------------------------------------------
    pv_columns = []
    age_data_list = []
    pv_columns.append(start_column_name)
    if node_id is not None:
        pv_columns.append(COL_NAME_NODE_ID)
    if gender is not None:
        pv_columns.append(COL_NAME_GENDER)
    if age_bin_list is not None:
        pv_columns.append(COL_NAME_AGE)
        age_data_list = df[COL_NAME_AGE].unique()

    data_columns = COL_NAME_POP

    gender_id = 1
    if gender == "Male":
        gender_id = 0

    pv = df.pivot_table(index=COL_NAME_YEAR,
                        columns=pv_columns,
                        values=data_columns,
                        aggfunc="sum")

    # -------------------------------------------------------
    # Move the data from the pivot table to a new dataframe.
    # If doing ages, we need to create labels for the columns
    # that includes the age ranges.
    # -------------------------------------------------------
    df2 = pd.DataFrame()
    df2.index = pv.index.values

    for strat_value in strat_values:
        list_for_tuple = []
        list_for_tuple.append(strat_value)
        if node_id is not None:
            list_for_tuple.append(node_id)
        if gender is not None:
            list_for_tuple.append(gender_id)
        if age_bin_list:
            for age_index in range(len(age_bin_list) - 1):
                age_min = age_bin_list[age_index]
                age_max = age_bin_list[age_index + 1]
                age_label = ":" + str(age_min) + " - " + str(age_max)
                df2[strat_value + age_label] = 0
                for age in age_data_list:
                    if (age_min <= age) and (age < age_max):
                        list_for_tuple_with_age = list_for_tuple.copy()
                        list_for_tuple_with_age.append(float(age))
                        column_tuple = tuple(list_for_tuple_with_age)
                        df2[strat_value + age_label] = df2[strat_value + age_label] + pv[column_tuple]
        else:
            column_tuple = strat_value
            if len(list_for_tuple) > 0:
                list_for_tuple_copy = list_for_tuple.copy()
                if len(list_for_tuple_copy) == 1:
                    column_tuple = list_for_tuple_copy[0]
                else:
                    column_tuple = tuple(list_for_tuple_copy)
            df2[strat_value] = pv[column_tuple]

    return df2

extract_population_data_multiple_ages(filename, node_id=None, gender=None, age_bin_list=None, filter_by_hiv_negative=False, other_strat_column_name=None, other_strat_value=None, other_data_column_names=None)

Extract population data for multiple ages for a specific node and gender.

Source code in emodpy_hiv/plotting/plot_hiv_by_age_and_gender.py
def extract_population_data_multiple_ages(filename: str,
                                          node_id: int = None,
                                          gender: str = None,
                                          age_bin_list: list[float] = None,
                                          filter_by_hiv_negative: bool = False,
                                          other_strat_column_name: str = None,
                                          other_strat_value: Union[int, float, str] = None,
                                          other_data_column_names: list[str] = None):
    """
    Extract population data for multiple ages for a specific node and gender.
    """
    df = pd.read_csv(filename)

    # -----------------------------------------
    # Verify the report had the correct columns
    # -----------------------------------------
    if COL_NAME_POP not in df.columns:
        raise ValueError(f"'Population' column does not exist in the file({filename}).")
    if other_strat_column_name is not None and other_strat_column_name not in df.columns:
        raise ValueError(f"'{other_strat_column_name}' column does not exist in the file({filename}).")

    if( ((other_strat_column_name is not None) and (other_strat_value is     None)) or # noqa: E201, E271, E275, W504
        ((other_strat_column_name is     None) and (other_strat_value is not None)) ): # noqa: E202, E271, E129
        raise ValueError("Both 'other_strat_column_name' and 'other_strat_value' must be specified.")

    if other_data_column_names is None:
        other_data_column_names = []

    # -----------------------------------------------------
    # Determine what data to put in to the pivot table and
    # create pivot table
    # -----------------------------------------------------
    pv_columns = []
    age_data_list = []
    if node_id is not None:
        pv_columns.append(COL_NAME_NODE_ID)
    if gender is not None:
        pv_columns.append(COL_NAME_GENDER)
    if other_strat_column_name is not None:
        pv_columns.append(other_strat_column_name)
    if age_bin_list is not None:
        pv_columns.append(COL_NAME_AGE)
        age_data_list = df[COL_NAME_AGE].unique()

    data_is_for_hiv_negative = False
    if filter_by_hiv_negative:
        if COL_NAME_HAS_HIV in df.columns:
            df = df[df[COL_NAME_HAS_HIV] == 0]
            data_is_for_hiv_negative = True

    data_columns = COL_NAME_POP
    if len(other_data_column_names) > 0:
        data_columns = []
        data_columns.append(COL_NAME_POP)
        for col_name in other_data_column_names:
            if col_name is not None and col_name not in df.columns:
                raise ValueError(f"'{col_name}' column does not exist in the file({filename}).")
            data_columns.append(col_name)

    gender_id = 1
    if gender == "Male":
        gender_id = 0

    pv = df.pivot_table(index=COL_NAME_YEAR,
                        columns=pv_columns,
                        values=data_columns,
                        aggfunc="sum")

    # -------------------------------------------------------
    # Move the data from the pivot table to a new dataframe.
    # If doing ages, we need to create labels for the columns
    # that includes the age ranges.
    # -------------------------------------------------------
    df2 = pd.DataFrame()
    df2.index = pv.index.values

    new_column_names = []
    new_column_names.append(COL_NAME_POP)
    for col_name in other_data_column_names:
        new_column_names.append(col_name)

    list_for_tuple = []
    if node_id is not None:
        list_for_tuple.append(node_id)
    if gender is not None:
        list_for_tuple.append(gender_id)
    if other_strat_column_name is not None:
        list_for_tuple.append(other_strat_value)
    if age_bin_list:
        for age_index in range(len(age_bin_list) - 1):
            age_min = age_bin_list[age_index]
            age_max = age_bin_list[age_index + 1]
            age_label = ":" + str(age_min) + " - " + str(age_max)
            for col_name in new_column_names:
                df2[col_name + age_label] = 0
            for age in age_data_list:
                if (age_min <= age) and (age < age_max):
                    for col_name in new_column_names:
                        list_for_tuple_with_age = []
                        if len(other_data_column_names) > 0:
                            list_for_tuple_with_age.append(col_name)
                        list_for_tuple_with_age.extend(list_for_tuple)
                        list_for_tuple_with_age.append(float(age))
                        if len(list_for_tuple_with_age) == 1:
                            column_tuple = list_for_tuple_with_age[0]
                        else:
                            column_tuple = tuple(list_for_tuple_with_age)
                        df2[col_name + age_label] = df2[col_name + age_label] + pv[column_tuple]
    else:
        for col_name in new_column_names:
            column_tuple = col_name
            if len(list_for_tuple) > 0:
                list_for_tuple_copy = []
                if len(other_data_column_names) > 0:
                    list_for_tuple_copy.append(col_name)
                list_for_tuple_copy.extend(list_for_tuple)
                if len(list_for_tuple_copy) == 1:
                    column_tuple = list_for_tuple_copy[0]
                else:
                    column_tuple = tuple(list_for_tuple_copy)
            df2[col_name] = pv[column_tuple]

    return df2, data_is_for_hiv_negative

plot_circumcision_by_age(filename, age_bin_list, fraction_of_total=False, img_dir=None)

For a single file, plot the number of men who are circumcised by age.

Parameters:

Name Type Description Default
filename (str, required)

The name and path of the ReportHIVByAgeAndGender.csv to extract the data from.

required
age_bin_list list[float]

A list of ages in years where the population with risk value will be counted for each bin. For example, if you enter [10, 25, 30, 55], there will be three age bins with the following ranges: [10-25), [25-30), [30-55)

required
fraction_of_total bool

If True, the number of men who are circumcised will be divided by the total number of men with that stratification.

False
img_dir str

Directory to save the images. If None, the images will not be saved and a window will be opened.

None

Returns:

Source code in emodpy_hiv/plotting/plot_hiv_by_age_and_gender.py
def plot_circumcision_by_age(filename: str,
                             age_bin_list: list[float],
                             fraction_of_total: bool = False,
                             img_dir: str = None):
    """
    For a single file, plot the number of men who are circumcised by age.

    Args:
        filename (str, required):
            The name and path of the ReportHIVByAgeAndGender.csv to extract the data from.

        age_bin_list (list[float], optional):
            A list of ages in years where the population with risk value will be counted
            for each bin.  For example, if you enter [10, 25, 30, 55], there will be three
            age bins with the following ranges: [10-25), [25-30), [30-55)

        fraction_of_total (bool, optional):
            If True, the number of men who are circumcised will be divided by the total number
            of men with that stratification.

        img_dir (str, optional):
            Directory to save the images. If None, the images will not be saved and a window will be opened.

    Returns:
    """
    if not os.path.isfile(filename):
        raise ValueError(f"The filename, '{filename}' given does not appear to be a file.")

    df = pd.read_csv(filename)

    if COL_NAME_GENDER not in df.columns:
        raise ValueError(f"'{COL_NAME_GENDER}' column does not exist in the file({filename}).")
    if COL_NAME_AGE not in df.columns:
        raise ValueError(f"'{COL_NAME_AGE}' column does not exist in the file({filename}).")

    ages = df[COL_NAME_AGE].unique()

    y_axis_name = "Number of Circumcised Men"
    if fraction_of_total:
        y_axis_name = "Fraction of Men Circumcised by Age"

    pv = df.pivot_table(index=COL_NAME_YEAR,
                        columns=[COL_NAME_GENDER, COL_NAME_AGE, COL_NAME_IS_CIRC],
                        values=COL_NAME_POP,
                        aggfunc="sum")

    df2 = pd.DataFrame()
    df2.index = pv.index

    gender_id = 0
    is_circumcised = 1
    for index in range(len(age_bin_list) - 1):
        label = f"Men Circumcised: {age_bin_list[index]}-{age_bin_list[index + 1]}"
        pv[label] = 0
        pv["total"] = 0
        for age in ages:
            if (age_bin_list[index] <= age) and (age < age_bin_list[index + 1]):
                pv["total"] = pv["total"] + pv[(gender_id, age, 0)] + pv[(gender_id, age, 1)]
                pv[label] = pv[label] + pv[(gender_id, age, is_circumcised)]
        df2[label] = pv[label]
        if fraction_of_total:
            df2[label] = df2[label] / pv["total"]

    xy_plot.xy_plot(img_dir=img_dir,
                    df=df2,
                    title_1="Circumcised Men by Age",
                    title_2=None,
                    y_axis_name=y_axis_name,
                    fraction_of_total=False,
                    min_x=None, max_x=None, min_y=None, max_y=None,
                    x_axis_as_log_scale=False,
                    y_axis_as_log_scale=False)

plot_columns(filename, title, y_axis_name, column_names, fraction_of_population=False, img_dir=None)

For a given file, plot the indicated columns versus time (i.e. Year).

Parameters:

Name Type Description Default
filename (str, required)

The name and path of the ReportHIVByAgeAndGender.csv file to extract the data from.

required
title (str, required)

The title to put at the top of the plot.

required
y_axis_name (str, required)

The name to label the y-axis on the plot.

required
column_names (list[str], required)

The list of column names to plot the data for. The report has a space before each column name. Please be sure to include it.

required
fraction_of_population bool

If True, divide the count each column by the population for the same stratification.

False
img_dir str

Directory to save the images. If None, the images will not be saved and a window will be opened.

None

Returns:

Source code in emodpy_hiv/plotting/plot_hiv_by_age_and_gender.py
def plot_columns(filename: str,
                 title: str,
                 y_axis_name: str,
                 column_names: list[str],
                 fraction_of_population: bool = False,
                 img_dir: str = None):
    """
    For a given file, plot the indicated columns versus time (i.e. Year).

    Args:
        filename (str, required):
            The name and path of the ReportHIVByAgeAndGender.csv file to extract the data from.

        title (str, required):
            The title to put at the top of the plot.

        y_axis_name (str, required):
            The name to label the y-axis on the plot.

        column_names (list[str], required):
            The list of column names to plot the data for.  The report has a space before each
            column name.  Please be sure to include it.

        fraction_of_population (bool, optional):
            If True, divide the count each column by the population for the same stratification.

        img_dir (str, optional):
            Directory to save the images. If None, the images will not be saved and a window will be opened.

    Returns:
    """
    if not os.path.isfile(filename):
        raise ValueError(f"The filename, '{filename}' given does not appear to be a file.")

    df = pd.read_csv(filename)

    for name in column_names:
        if name not in df.columns:
            raise ValueError(f"'{name}' column does not exist in the file({filename}).")

    if fraction_of_population:
        column_names.append(COL_NAME_POP)

    pv = df.pivot_table(index=COL_NAME_YEAR,
                        columns=[],
                        values=column_names,
                        aggfunc="sum")

    df2 = pd.DataFrame()
    df2.index = pv.index
    for value in column_names:
        if fraction_of_population and (value != COL_NAME_POP):
            df2[value] = pv[value] / pv[COL_NAME_POP]
        elif not fraction_of_population:
            df2[value] = pv[value]

    xy_plot.xy_plot(img_dir=img_dir,
                    df=df2,
                    title_1=title,
                    title_2=None,
                    y_axis_name=y_axis_name,
                    fraction_of_total=False,
                    min_x=None, max_x=None, min_y=None, max_y=None,
                    x_axis_as_log_scale=False,
                    y_axis_as_log_scale=False)

plot_onART_by_age(dir_or_filename, exp_dir_or_filename=None, node_id=None, gender=None, age_bin_list=None, show_avg_per_run=False, show_fraction=False, fraction_of_infected=False, img_dir=None)

Create a plot showing information about the people on ART. You can show the fraction of the population or the fraction of the infected population.

Parameters:

Name Type Description Default
dir_or_filename (str, required)

The directory or filename containing the ReportHIVByAgeAndGender.csv files.

required
exp_dir_or_filename (str, required)

The expected or alternate directory or filename containing the ReportHIVByAgeAndGender.csv files.

None
node_id int

The ID of the node for which the data is being filtered for.

None
gender str

The string (Male or Female) for the gender that data is being filtered for.

None
age_bin_list list[float]

A list of ages in years where the population with risk value will be counted for each bin. For example, if you enter [10, 25, 30, 55], there will be three age bins with the following ranges: [10-25), [25-30), [30-55)

None
show_avg_per_run bool

If 'dir_or_filename' is a directory, this will calculate the average number of people with the given risk type at a given time step between the files. Default is False.

False
show_fraction bool

True indicates that the number of people on ART will be divided by either the number of people in the population or the number of infected people. It depends on the 'fraction_of_infected' parameter.

False
fraction_of_infected bool

If 'show_fraction' is True, then this parameter determines what the divisor is when creating the fraction. If it is True, it will divide the number of people on ART by the number of infected people. If it is False, the divisor will the total population with that stratification.

False
img_dir str

Directory to save the images. If None, the images will not be saved and a window will be opened.

None

Returns:

Source code in emodpy_hiv/plotting/plot_hiv_by_age_and_gender.py
def plot_onART_by_age(dir_or_filename: str,
                      exp_dir_or_filename: str = None,
                      node_id: int = None,
                      gender: str = None,
                      age_bin_list: list[float] = None,
                      show_avg_per_run: bool = False,
                      show_fraction: bool = False,
                      fraction_of_infected: bool = False,
                      img_dir: str = None):
    """
    Create a plot showing information about the people on ART.  You can show the fraction of the
    population or the fraction of the infected population.

    Args:
        dir_or_filename (str, required):
            The directory or filename containing the ReportHIVByAgeAndGender.csv files.

        exp_dir_or_filename (str, required):
            The expected or alternate directory or filename containing the ReportHIVByAgeAndGender.csv files.

        node_id (int, optional):
            The ID of the node for which the data is being filtered for.

        gender (str, optional):
            The string (Male or Female) for the gender that data is being filtered for.

        age_bin_list (list[float], optional):
            A list of ages in years where the population with risk value will be counted
            for each bin.  For example, if you enter [10, 25, 30, 55], there will be three
            age bins with the following ranges: [10-25), [25-30), [30-55)

        show_avg_per_run (bool, optional):
            If 'dir_or_filename' is a directory, this will calculate the average number of
            people with the given risk type at a given time step between the files.
            Default is False.

        show_fraction (bool, optional):
            True indicates that the number of people on ART will be divided by either the number
            of people in the population or the number of infected people.  It depends on the
            'fraction_of_infected' parameter.

        fraction_of_infected (bool, optional):
            If 'show_fraction' is True, then this parameter determines what the divisor is when
            creating the fraction.  If it is True, it will divide the number of people on ART
            by the number of infected people.  If it is False, the divisor will the total population
            with that stratification.

        img_dir (str, optional):
            Directory to save the images. If None, the images will not be saved and a window will be opened.

    Returns:
    """
    base_plot_by_age(base_title="On ART",
                     main_column_name=COL_NAME_ON_ART,
                     dir_or_filename=dir_or_filename,
                     exp_dir_or_filename=exp_dir_or_filename,
                     node_id=node_id,
                     gender=gender,
                     age_bins=age_bin_list,
                     show_avg_per_run=show_avg_per_run,
                     show_fraction=show_fraction,
                     fraction_of=fraction_of_infected,
                     fraction_of_column_name=COL_NAME_INFECTED,
                     fraction_of_str="Infected ",
                     img_dir=img_dir)

plot_population_by_age(dir_or_filename, exp_dir_or_filename=None, node_id=None, gender=None, age_bin_list=None, show_avg_per_run=False, img_dir=None)

Create a plot showing the population over time.

Parameters:

Name Type Description Default
dir_or_filename (str, required)

The directory or filename containing the ReportHIVByAgeAndGender.csv files.

required
exp_dir_or_filename (str, required)

The expected or alternate directory or filename containing the ReportHIVByAgeAndGender.csv files.

None
node_id int

The ID of the node for which the data is being filtered for.

None
gender str

The string (Male or Female) for the gender that data is being filtered for.

None
age_bin_list list[float]

A list of ages in years where the population with risk value will be counted for each bin. For example, if you enter [10, 25, 30, 55], there will be three age bins with the following ranges: [10-25), [25-30), [30-55)

None
show_avg_per_run bool

If 'dir_or_filename' is a directory, this will calculate the average number of people with the given risk type at a given time step between the files. Default is False.

False
img_dir str

Directory to save the images. If None, the images will not be saved and a window will be opened.

None

Returns:

Source code in emodpy_hiv/plotting/plot_hiv_by_age_and_gender.py
def plot_population_by_age(dir_or_filename: str,
                           exp_dir_or_filename: str = None,
                           node_id: int = None,
                           gender: str = None,
                           age_bin_list: list[float] = None,
                           show_avg_per_run: bool = False,
                           img_dir: str = None):
    """
    Create a plot showing the population over time.

    Args:
        dir_or_filename (str, required):
            The directory or filename containing the ReportHIVByAgeAndGender.csv files.

        exp_dir_or_filename (str, required):
            The expected or alternate directory or filename containing the ReportHIVByAgeAndGender.csv files.

        node_id (int, optional):
            The ID of the node for which the data is being filtered for.

        gender (str, optional):
            The string (Male or Female) for the gender that data is being filtered for.

        age_bin_list (list[float], optional):
            A list of ages in years where the population with risk value will be counted
            for each bin.  For example, if you enter [10, 25, 30, 55], there will be three
            age bins with the following ranges: [10-25), [25-30), [30-55)

        show_avg_per_run (bool, optional):
            If 'dir_or_filename' is a directory, this will calculate the average number of
            people with the given risk type at a given time step between the files.
            Default is False.

        img_dir (str, optional):
            Directory to save the images. If None, the images will not be saved and a window will be opened.

    Returns:
    """
    if age_bin_list is not None and len(age_bin_list) == 0:
        raise ValueError("The 'age_bin_list' parameter must be a list of ages in years where the population will be counted for each bin. "
                         + "If you do not want to use age bins, please set it to None.")

    base_plot_by_age(base_title="",
                     main_column_name=COL_NAME_POP,
                     dir_or_filename=dir_or_filename,
                     exp_dir_or_filename=exp_dir_or_filename,
                     node_id=node_id,
                     gender=gender,
                     age_bins=age_bin_list,
                     show_avg_per_run=show_avg_per_run,
                     show_fraction=False,
                     fraction_of=False,
                     fraction_of_column_name=None,
                     fraction_of_str=None,
                     img_dir=img_dir)

plot_population_by_age_vs_unworld_pop(filename, unworld_pop_filename, age_bin_list, country, version, x_base_population=1.0, img_dir=None)

For a single file, plot the actual population for a given age bin versus what was expected in the given UN World Population file. If you have define three age bins, there will be six curves - an actual and an expected for each bin.

Parameters:

Name Type Description Default
filename (str, required)

The name and path of the ReportHIVByAgeAndGender.csv file to plot the data from.

required
unworld_pop_filename (str, required)

The name and path to a UN World Pop Excel spreadsheet where the 'country' parameter specifies a country name found in the spreadsheet and the 'version' specifies the year of the data. These values are needed to know how to read the data in the spreadsheet.

required
age_bin_list (list[float], required)

A list of ages in years where the population with risk value will be counted for each bin. For example, if you enter [10, 25, 30, 55], there will be three age bins with the following ranges: [10-25), [25-30), [30-55)

required
country (str, required)

The name of the country found in the spreadsheet to extract the data for.

required
version (str, required)

The year associated with when the UN World Pop file was created. PLEASE NOTE: This year is a string.

required
x_base_population float

The 'x_Base_Population' value (found in the config) is used to divide by the population numbers in the CSV file so you get numbers that match the true population.

1.0
img_dir str

Directory to save the images. If None, the images will not be saved and a window will be opened.

None

Returns:

Source code in emodpy_hiv/plotting/plot_hiv_by_age_and_gender.py
def plot_population_by_age_vs_unworld_pop(filename: str,
                                          unworld_pop_filename: str,
                                          age_bin_list: list[float],
                                          country: str,
                                          version: str,
                                          x_base_population: float = 1.0,
                                          img_dir=None):
    """
    For a single file, plot the actual population for a given age bin versus what was expected
    in the given UN World Population file.  If you have define three age bins, there will be
    six curves -  an actual and an expected for each bin.

    Args:
        filename (str, required):
            The name and path of the ReportHIVByAgeAndGender.csv file to plot the data from.

        unworld_pop_filename (str, required):
            The name and path to a UN World Pop Excel spreadsheet where the 'country' parameter specifies
            a country name found in the spreadsheet and the 'version' specifies the year of the data.
            These values are needed to know how to read the data in the spreadsheet.

        age_bin_list (list[float], required):
            A list of ages in years where the population with risk value will be counted
            for each bin.  For example, if you enter [10, 25, 30, 55], there will be three
            age bins with the following ranges: [10-25), [25-30), [30-55)

        country (str, required):
            The name of the country found in the spreadsheet to extract the data for.

        version (str, required):
            The year associated with when the UN World Pop file was created.
            PLEASE NOTE: This year is a string.

        x_base_population (float, optional):
            The 'x_Base_Population' value (found in the config) is used to divide by the population
            numbers in the CSV file so you get numbers that match the true population.

        img_dir (str, optional):
            Directory to save the images. If None, the images will not be saved and a window will be opened.

    Returns:
    """
    if not os.path.isfile(filename):
        raise ValueError(f"The filename, '{filename}' given does not appear to be a file.")

    df = pd.read_csv(filename)

    if COL_NAME_GENDER not in df.columns:
        raise ValueError(f"'{COL_NAME_GENDER}' column does not exist in the file({filename}).")
    if COL_NAME_AGE not in df.columns:
        raise ValueError(f"'{COL_NAME_AGE}' column does not exist in the file({filename}).")
    if COL_NAME_POP not in df.columns:
        raise ValueError(f"'{COL_NAME_POP}' column does not exist in the file({filename}).")

    ages = df[COL_NAME_AGE].unique()

    pv = df.pivot_table(index=COL_NAME_YEAR,
                        columns=[COL_NAME_AGE],
                        values=COL_NAME_POP,
                        aggfunc="sum")

    df2 = pd.DataFrame()
    df2.index = pv.index

    for index in range(len(age_bin_list) - 1):
        label = f"Population: {age_bin_list[index]}-{age_bin_list[index + 1]}"
        pv[label] = 0
        for age in ages:
            if (age_bin_list[index] <= age) and (age < age_bin_list[index + 1]):
                pv[label] = pv[label] + pv[(age)]
        df2[label] = pv[label] / x_base_population

    years = df2.index.values.astype(int).tolist()
    years = list(set(years))

    unwp_df = unwp.extract_population_by_age(country=country,
                                             version=version,
                                             years=years,
                                             filename=unworld_pop_filename)
    unwp_df.index = unwp_df[COL_NAME_YEAR]

    unwp_df2 = pd.DataFrame()
    unwp_df2.index = unwp_df.index

    for index in range(len(age_bin_list) - 1):
        name_old = str(age_bin_list[index])
        name_new = f"Expected Population: {age_bin_list[index]}-{age_bin_list[index + 1]}"
        unwp_df2[name_new] = unwp_df[name_old]

    unwp_df2.index = unwp_df2.index.astype(float)

    xy_plot.xy_plot(img_dir=img_dir,
                    df=df2,
                    expected_df=unwp_df2,
                    title_1="Population by Age",
                    title_2=f"UN World Population - {country} - {version}",
                    y_axis_name="Number of People",
                    fraction_of_total=False,
                    min_x=None, max_x=None, min_y=None, max_y=None,
                    x_axis_as_log_scale=False,
                    y_axis_as_log_scale=False)

plot_population_by_gender(filename, img_dir=None)

For the given file, plot the population for each gender over time.

Args filename (str, required): The name and path of the ReportHIVByAgeAndGender.csv file to extract the data from.

img_dir (str, optional):
    Directory to save the images. If None, the images will not be saved and a window will be opened.

Returns:

Source code in emodpy_hiv/plotting/plot_hiv_by_age_and_gender.py
def plot_population_by_gender(filename: str,
                              img_dir: str = None):
    """
    For the given file, plot the population for each gender over time.

    Args
        filename (str, required):
            The name and path of the ReportHIVByAgeAndGender.csv file to extract the data from.

        img_dir (str, optional):
            Directory to save the images. If None, the images will not be saved and a window will be opened.

    Returns:
    """
    if not os.path.isfile(filename):
        raise ValueError(f"The filename, '{filename}' given does not appear to be a file.")

    df = pd.read_csv(filename)

    if COL_NAME_GENDER not in df.columns:
        raise ValueError(f"'{COL_NAME_GENDER}' column does not exist in the file({filename}).")

    pv = df.pivot_table(index=COL_NAME_YEAR,
                        columns=[COL_NAME_GENDER],
                        values=COL_NAME_POP,
                        aggfunc="sum")

    df2 = pd.DataFrame()
    df2.index = pv.index
    df2["Number of Men"] = pv[0]
    df2["Number of Women"] = pv[1]

    xy_plot.xy_plot(img_dir=img_dir,
                    df=df2,
                    title_1="Population by Gender",
                    title_2=None,
                    y_axis_name="Number of People",
                    fraction_of_total=False,
                    min_x=None, max_x=None, min_y=None, max_y=None,
                    x_axis_as_log_scale=False,
                    y_axis_as_log_scale=False)

plot_population_by_ip(dir_or_filename, exp_dir_or_filename=None, node_id=None, gender=None, age_bin_list=None, ip_key=None, ip_values=None, show_avg_per_run=False, show_fraction=False, expected_values=None, img_dir=None)

For the indicated files, create a plot showing who has what value of the give IP key over time.

Parameters:

Name Type Description Default
dir_or_filename (str, required)

The directory or filename containing the ReportHIVByAgeAndGender.csv files.

required
exp_dir_or_filename (str, required)

The expected or alternate directory or filename containing the ReportHIVByAgeAndGender.csv files.

None
node_id int

The ID of the node for which the data is being filtered for.

None
gender str

The string (Male or Female) for the gender that data is being filtered for.

None
age_bin_list list[float]

A list of ages, in years, where the population with risk value will be counted for each bin. For example, if you enter [10, 25, 30, 55], there will be three age bins with the following ranges: [10-25), [25-30), [30-55)

None
ip_key (str, required)

Extract the data from the files based on this IP stratification column. If the files has not been stratified by this IP, you will need to re-run the simulations with stratification turned on.

None
ip_values list[str]

By default, this plotting tool uses all of the values for the IP, however, this allows you to only include the ones you are interested (i.e. a subset to the total possible values for the IP)

None
show_avg_per_run bool

If 'dir_or_filename' is a directory, this will calculate the average number of people with the given risk type at a given time step between the files. Default is False.

False
show_fraction bool

True indicates that the number of people the given IP value will be divided by the sum of all the people with all of the selected IP values. For example, if the IP key were Risk and you selected to only plot LOW and MEDIUM, then the total number of people with LOW will be divided by the total number of people with either LOW or MEDIUM.

False
expected_values dict

If the user provides this dictionary, the constant expected value of each IP will be plotted. This should be a dictionary with an IP value as the key and a constant expected value. There should be an IP value for each IP value plotted.

None
img_dir str

Directory to save the images. If None, the images will not be saved and a window will be opened.

None

Returns:

Source code in emodpy_hiv/plotting/plot_hiv_by_age_and_gender.py
def plot_population_by_ip(dir_or_filename: str,
                          exp_dir_or_filename: str = None,
                          node_id: int = None,
                          gender: str = None,
                          age_bin_list=None,
                          ip_key: str = None,
                          ip_values: list[str] = None,
                          show_avg_per_run: bool = False,
                          show_fraction: bool = False,
                          expected_values: dict = None,
                          img_dir: str = None):
    """
    For the indicated files, create a plot showing who has what value of the give IP key over time.

    Args:
        dir_or_filename (str, required):
            The directory or filename containing the ReportHIVByAgeAndGender.csv files.

        exp_dir_or_filename (str, required):
            The expected or alternate directory or filename containing the ReportHIVByAgeAndGender.csv files.

        node_id (int, optional):
            The ID of the node for which the data is being filtered for.

        gender (str, optional):
            The string (Male or Female) for the gender that data is being filtered for.

        age_bin_list (list[float], optional):
            A list of ages, in years, where the population with risk value will be counted
            for each bin.  For example, if you enter [10, 25, 30, 55], there will be three
            age bins with the following ranges: [10-25), [25-30), [30-55)

        ip_key (str, required):
            Extract the data from the files based on this IP stratification column.
            If the files has not been stratified by this IP, you will need to re-run the
            simulations with stratification turned on.

        ip_values (list[str], optional):
            By default, this plotting tool uses all of the values for the IP, however, this allows
            you to only include the ones you are interested (i.e. a subset to the total possible
            values for the IP)

        show_avg_per_run (bool, optional):
            If 'dir_or_filename' is a directory, this will calculate the average number of
            people with the given risk type at a given time step between the files.
            Default is False.

        show_fraction (bool, optional):
            True indicates that the number of people the given IP value will be divided by
            the sum of all the people with all of the selected IP values.  For example, if the
            IP key were Risk and you selected to only plot LOW and MEDIUM, then the total number
            of people with LOW will be divided by the total number of people with either LOW or MEDIUM.

        expected_values (dict, optional):
            If the user provides this dictionary, the constant expected value of each IP will be
            plotted.  This should be a dictionary with an IP value as the key and a constant expected value.
            There should be an IP value for each IP value plotted.

        img_dir (str, optional):
            Directory to save the images. If None, the images will not be saved and a window will be opened.

    Returns:
    """

    if (exp_dir_or_filename is not None) and (expected_values is not None):
        raise ValueError("You cannot specify both 'exp_dir_or_filename' and 'expected_values'. "
                         + "Please only specify one of these.")

    if ip_key is None:
        raise ValueError("IP Key must be specified.")
    ip_col_name = " IP_Key:" + ip_key

    combined_df, col_name_prefixs = extract_population_data_by_stratification_for_dir(dir_or_filename=dir_or_filename,
                                                                                      node_id=node_id,
                                                                                      gender=gender,
                                                                                      age_bin_list=age_bin_list,
                                                                                      start_column_name=ip_col_name,
                                                                                      strat_values=ip_values,
                                                                                      show_avg_per_run=show_avg_per_run)

    combined_df = create_df_for_plot_by_stratification(combined_df=combined_df,
                                                       col_name_prefixs=col_name_prefixs,
                                                       age_bin_list=age_bin_list,
                                                       show_fraction=show_fraction)

    expected_df = None
    if expected_values:
        expected_df = pd.DataFrame()
        expected_df.index = combined_df.index
        for value in ip_values:
            expected_df["Expected: " + value] = expected_values[value]
    elif exp_dir_or_filename is not None:
        expected_df, exp_col_name_prefixs = extract_population_data_by_stratification_for_dir(dir_or_filename=exp_dir_or_filename,
                                                                                              node_id=node_id,
                                                                                              gender=gender,
                                                                                              age_bin_list=age_bin_list,
                                                                                              start_column_name=ip_col_name,
                                                                                              strat_values=ip_values,
                                                                                              show_avg_per_run=show_avg_per_run)
        expected_df = create_df_for_plot_by_stratification(combined_df=expected_df,
                                                           col_name_prefixs=exp_col_name_prefixs,
                                                           age_bin_list=age_bin_list,
                                                           show_fraction=show_fraction)

    base_title = "with " + ip_key + "=X"
    title = create_title(base_title=base_title,
                         node_id=node_id,
                         gender=gender,
                         show_avg_per_run=show_avg_per_run,
                         show_fraction=show_fraction,
                         show_fraction_of=False,
                         fraction_of_str=None,
                         has_age_bins=(age_bin_list is not None))
    y_axis_name = create_y_axis_name(base_title=" " + base_title,
                                     node_id=node_id,
                                     gender=gender,
                                     show_avg_per_run=show_avg_per_run,
                                     show_fraction=show_fraction,
                                     show_fraction_of=False,
                                     fraction_of_str=None,
                                     has_age_bins=(age_bin_list is not None))

    title2 = None
    if TEST_include_dir_or_filename:
        if exp_dir_or_filename is not None:
            title2 = "color= " + dir_or_filename
            title2 = title2 + "\nblack = " + exp_dir_or_filename
        else:
            title2 = dir_or_filename

    xy_plot.xy_plot(img_dir=img_dir,
                    df=combined_df,
                    expected_df=expected_df,
                    title_1=title,
                    title_2=title2,
                    y_axis_name=y_axis_name,
                    show_legend=show_avg_per_run,
                    fraction_of_total=False,
                    min_x=None, max_x=None, min_y=None, max_y=None,
                    x_axis_as_log_scale=False,
                    y_axis_as_log_scale=False)

plot_population_for_dir(dir_or_filename, unworld_pop_filename, country, version, x_base_population=1.0, show_avg_per_run=False, gender=None, age_bin=None, img_dir=None)

Plot the population for the given age bin against the data in the UN World Population spreadsheet.

Parameters:

Name Type Description Default
dir_or_filename (str, required)

The directory or filename containing the ReportHIVByAgeAndGender.csv files.

required
unworld_pop_filename (str, required)

The name and path to a UN World Pop Excel spreadsheet where the 'country' parameter specifies a country name found in the spreadsheet and the 'version' specifies the year of the data. These values are needed to know how to read the data in the spreadsheet.

required
country (str, required)

The name of the country found in the spreadsheet to extract the data for.

required
version (str, required)

The year associated with when the UN World Pop file was created. PLEASE NOTE: This year is a string.

required
x_base_population float

The 'x_Base_Population' value (found in the config) is used to divide by the population numbers in the CSV file so you get numbers that match the true population.

1.0
show_avg_per_run bool

If 'dir_or_filename' is a directory, this will calculate the average number of people with the given risk type at a given time step between the files. Default is False.

False
gender str

The string (Male or Female) for the gender that data is being filtered for.

None
age_bin float

If provided, the data for this specific age stratification will be plotted. Both the data in the report file and the UN World Pop file must have this stratification. If you do not provide a value, then the population is not broken up by age (i.e. total population).

None
img_dir str

Directory to save the images. If None, the images will not be saved and a window will be opened.

None

Returns:

Source code in emodpy_hiv/plotting/plot_hiv_by_age_and_gender.py
def plot_population_for_dir(dir_or_filename: str,
                            unworld_pop_filename: str,
                            country: str,
                            version: str,
                            x_base_population: float = 1.0,
                            show_avg_per_run: bool = False,
                            gender: str = None,
                            age_bin: float = None,
                            img_dir: str = None):
    """
    Plot the population for the given age bin against the data in the UN World Population spreadsheet.

    Args:
        dir_or_filename (str, required):
            The directory or filename containing the ReportHIVByAgeAndGender.csv files.

        unworld_pop_filename (str, required):
            The name and path to a UN World Pop Excel spreadsheet where the 'country' parameter specifies
            a country name found in the spreadsheet and the 'version' specifies the year of the data.
            These values are needed to know how to read the data in the spreadsheet.

        country (str, required):
            The name of the country found in the spreadsheet to extract the data for.

        version (str, required):
            The year associated with when the UN World Pop file was created.
            PLEASE NOTE: This year is a string.

        x_base_population (float, optional):
            The 'x_Base_Population' value (found in the config) is used to divide by the population
            numbers in the CSV file so you get numbers that match the true population.

        show_avg_per_run (bool, optional):
            If 'dir_or_filename' is a directory, this will calculate the average number of
            people with the given risk type at a given time step between the files.
            Default is False.

        gender (str, optional):
            The string (Male or Female) for the gender that data is being filtered for.

        age_bin (float, optional):
            If provided, the data for this specific age stratification will be plotted.
            Both the data in the report file and the UN World Pop file must have this
            stratification.  If you do not provide a value, then the population is not
            broken up by age (i.e. total population).

        img_dir (str, optional):
            Directory to save the images. If None, the images will not be saved and a window will be opened.

    Returns:
    """
    if x_base_population <= 0.0:
        raise ValueError("'x_base_population' must be a value greater than zero.")

    # Get the file or files in the directory
    dir_filenames = helpers.get_filenames(dir_or_filename=dir_or_filename,
                                          file_prefix="ReportHIVByAgeAndGender",
                                          file_extension=".csv")

    # ---------------------------------------------------------------
    # Extract the data from each file and combine into one dataframe
    # ---------------------------------------------------------------
    combined_df = pd.DataFrame()
    for fn in dir_filenames:
        df = extract_population_data(fn, gender=gender, age_bin=age_bin)
        if len(combined_df.columns) == 0:
            combined_df.index = df.index
            if show_avg_per_run:
                combined_df[COL_NAME_POP] = 0
        if show_avg_per_run:
            combined_df[COL_NAME_POP] = combined_df[COL_NAME_POP] + df[COL_NAME_POP] / x_base_population
        else:
            combined_df[fn] = df[COL_NAME_POP] / x_base_population

    if show_avg_per_run:
        for column_name in combined_df.columns:
            combined_df[column_name] = combined_df[column_name] / len(dir_filenames)

    # ----------------------------------------------------------------
    # Extract the expected population data from the UN World Pop file
    # and put into dataframe.  Get the data for the years that you have
    # in the CSV file.
    # ----------------------------------------------------------------
    years = combined_df.index.values.astype(int).tolist()
    years = list(set(years))

    date_column = COL_NAME_YEAR
    if version == "2015":
        date_column = "Reference date (as of 1 July)"

    unwp_df2 = None
    title2 = dir_or_filename
    if unworld_pop_filename:
        title2 = str(unworld_pop_filename)
        unwp_df = unwp.extract_population_by_age(country=country,
                                                 version=version,
                                                 years=years,
                                                 filename=unworld_pop_filename)
        unwp_df.index = unwp_df[date_column].astype(float)
        del unwp_df[date_column]
        rename_dict = {}
        for name in unwp_df.columns:
            rename_dict[name] = int(name)
        unwp_df = unwp_df.rename(columns=rename_dict)

        unwp_df2 = pd.DataFrame()
        unwp_df2.index = unwp_df.index.astype(float)
        if age_bin is not None:
            unwp_df2["Expected Population"] = unwp_df[age_bin]
        else:
            unwp_df2["Expected Population"] = unwp_df.sum(axis=1)

    # --------------------------
    # create title and plot data
    # --------------------------
    title = ""
    if show_avg_per_run:
        title = title + "Average Per Run"
    title = title + "Population"
    if gender is not None:
        title = title + " - " + gender
    if age_bin is not None:
        if age_bin == 80:
            title = title + " - " + str(age_bin) + "+"
        else:
            title = title + " - " + str(age_bin) + "-" + str(age_bin + 5)

    if not TEST_include_dir_or_filename:
        title2 = None

    xy_plot.xy_plot(img_dir=img_dir,
                    df=combined_df,
                    expected_df=unwp_df2,
                    title_1=title,
                    title_2=title2,
                    y_axis_name="Number of People",
                    fraction_of_total=False,
                    show_legend=show_avg_per_run,
                    show_markers=show_avg_per_run,
                    min_x=None, max_x=None, min_y=None, max_y=None,
                    x_axis_as_log_scale=False,
                    y_axis_as_log_scale=False)

plot_prevalence_for_dir(dir_or_filename, exp_dir_or_filename=None, node_id=None, gender=None, age_bin_list=None, show_avg_per_run=False, show_fraction=False, img_dir=None)

Create a plot showing who is infected with HIV.

Parameters:

Name Type Description Default
dir_or_filename (str, required)

The directory or filename containing the ReportHIVByAgeAndGender.csv files.

required
exp_dir_or_filename (str, required)

The expected or alternate directory or filename containing the ReportHIVByAgeAndGender.csv files.

None
node_id int

The ID of the node for which the data is being filtered for.

None
gender str

The string (Male or Female) for the gender that data is being filtered for.

None
age_bin_list list[float]

A list of ages in years where the population with risk value will be counted for each bin. For example, if you enter [10, 25, 30, 55], there will be three age bins with the following ranges: [10-25), [25-30), [30-55)

None
show_avg_per_run bool

If 'dir_or_filename' is a directory, this will calculate the average number of people with the given risk type at a given time step between the files. Default is False.

False
show_fraction bool

True indicates that the number of infected people should be divided by the total number of people in that stratification.

False
img_dir str

Directory to save the images. If None, the images will not be saved and a window will be opened.

None

Returns:

Source code in emodpy_hiv/plotting/plot_hiv_by_age_and_gender.py
def plot_prevalence_for_dir(dir_or_filename: str,
                            exp_dir_or_filename: str = None,
                            node_id: int = None,
                            gender: str = None,
                            age_bin_list: list[float] = None,
                            show_avg_per_run: bool = False,
                            show_fraction: bool = False,
                            img_dir: str = None):
    """
    Create a plot showing who is infected with HIV.

    Args:
        dir_or_filename (str, required):
            The directory or filename containing the ReportHIVByAgeAndGender.csv files.

        exp_dir_or_filename (str, required):
            The expected or alternate directory or filename containing the ReportHIVByAgeAndGender.csv files.

        node_id (int, optional):
            The ID of the node for which the data is being filtered for.

        gender (str, optional):
            The string (Male or Female) for the gender that data is being filtered for.

        age_bin_list (list[float], optional):
            A list of ages in years where the population with risk value will be counted
            for each bin.  For example, if you enter [10, 25, 30, 55], there will be three
            age bins with the following ranges: [10-25), [25-30), [30-55)

        show_avg_per_run (bool, optional):
            If 'dir_or_filename' is a directory, this will calculate the average number of
            people with the given risk type at a given time step between the files.
            Default is False.

        show_fraction (bool, optional):
            True indicates that the number of infected people should be divided by the total
            number of people in that stratification.

        img_dir (str, optional):
            Directory to save the images. If None, the images will not be saved and a window will be opened.

    Returns:
    """
    base_plot_by_age(base_title="Infected",
                     main_column_name=COL_NAME_INFECTED,
                     dir_or_filename=dir_or_filename,
                     exp_dir_or_filename=exp_dir_or_filename,
                     node_id=node_id,
                     age_bins=age_bin_list,
                     gender=gender,
                     show_avg_per_run=show_avg_per_run,
                     show_fraction=show_fraction,
                     fraction_of=False,
                     fraction_of_column_name="",
                     fraction_of_str="",
                     img_dir=img_dir)

plot_risk(dir_or_filename, starting_expected_values=None, expected_value_for_high_per_node=None, gender=None, age_bin_list=None, show_avg_per_run=False, show_fraction=False, img_dir=None)

Create one plot for each node in 'expected_value_for_high_per_node' showing the risk values for the population versus what is expected.

Parameters:

Name Type Description Default
dir_or_filename (str, required)

The directory or filename containing the ReportHIVByAgeAndGender.csv files.

required
starting_expected_values dict

The starting three values of how risk distributed to the population. These should be the same values that you have in the demographics. The order is LOW, MEDIUM, and HIGH. Typically, one might have 0.85 for LOW, 0.15 for MEDIUM, and 0.0 for HIGH because people get set to HIGH in the campaign's CSW logic.

None
expected_value_for_high_per_node list[float, optional]

A list of expected fraction of the population to have Risk = HIGH for a given node. The node ID of each value is expected to be the index of the position plus 1. The starting values for LOW and MEDIUM are adjusted for this HIGH value.

None
gender str

The string (Male or Female) for the gender that data is being filtered for.

None
age_bin_list list[float]

A list of ages in years where the population with risk value will be counted for each bin. For example, if you enter [10, 25, 30, 55], there will be three age bins with the following ranges: [10-25), [25-30), [30-55)

None
show_avg_per_run bool

If 'dir_or_filename' is a directory, this will calculate the average number of people with the given risk type at a given time step between the files. Default is False.

False
show_fraction bool

True indicates that for each stratification the number of people with a given risk value is divided by the total number of people in that stratification.

False
img_dir str

Directory to save the images. If None, the images will not be saved and a window will be opened.

None

Returns:

Source code in emodpy_hiv/plotting/plot_hiv_by_age_and_gender.py
def plot_risk(dir_or_filename: str,
              starting_expected_values: dict = None,
              expected_value_for_high_per_node: list[float] = None,
              gender: str = None,
              age_bin_list: list[float] = None,
              show_avg_per_run: bool = False,
              show_fraction: bool = False,
              img_dir: str = None):
    """
    Create one plot for each node in 'expected_value_for_high_per_node' showing the risk
    values for the population versus what is expected.

    Args:
        dir_or_filename (str, required):
            The directory or filename containing the ReportHIVByAgeAndGender.csv files.

        starting_expected_values (dict, optional):
            The starting three values of how risk distributed to the population.  These
            should be the same values that you have in the demographics.  The order is
            LOW, MEDIUM, and HIGH.  Typically, one might have 0.85 for LOW, 0.15 for MEDIUM,
            and 0.0 for HIGH because people get set to HIGH in the campaign's CSW logic.

        expected_value_for_high_per_node (list[float, optional]):
            A list of expected fraction of the population to have Risk = HIGH for a given node.
            The node ID of each value is expected to be the index of the position plus 1.
            The starting values for LOW and MEDIUM are adjusted for this HIGH value.

        gender (str, optional):
            The string (Male or Female) for the gender that data is being filtered for.

        age_bin_list (list[float], optional):
            A list of ages in years where the population with risk value will be counted
            for each bin.  For example, if you enter [10, 25, 30, 55], there will be three
            age bins with the following ranges: [10-25), [25-30), [30-55)

        show_avg_per_run (bool, optional):
            If 'dir_or_filename' is a directory, this will calculate the average number of
            people with the given risk type at a given time step between the files.
            Default is False.

        show_fraction (bool, optional):
            True indicates that for each stratification the number of people with a given
            risk value is divided by the total number of people in that stratification.

        img_dir (str, optional):
            Directory to save the images. If None, the images will not be saved and a window will be opened.

    Returns:
    """

    if ((starting_expected_values is not None) and (expected_value_for_high_per_node is     None) or    # noqa: E271, W504
        (starting_expected_values is     None) and (expected_value_for_high_per_node is not None)):     # noqa: E225, E271, E129
        raise ValueError("'starting_expected_values' and 'expected_value_for_high_per_node'" +          # noqa: W504
                         " need to be either both None or both defined.")

    if (starting_expected_values is not None) and (starting_expected_values["HIGH"] != 0.0):
        raise ValueError("Expected 'starting_expected_values['HIGH'] to be zero.")

    if (starting_expected_values is not None) and (expected_value_for_high_per_node is not None):
        node_id = 0
        for high_value in expected_value_for_high_per_node:
            node_id = node_id + 1

            expected_values = None
            if (starting_expected_values is not None) and (expected_value_for_high_per_node is not None):
                expected_values = {}
                expected_values["LOW"   ] = starting_expected_values["LOW"   ] * (1.0 - high_value) # noqa: E202
                expected_values["MEDIUM"] = starting_expected_values["MEDIUM"] * (1.0 - high_value)
                expected_values["HIGH"  ] = high_value # noqa: E202

            plot_population_by_ip(dir_or_filename=dir_or_filename,
                                  node_id=node_id,
                                  gender=gender,
                                  age_bin_list=age_bin_list,
                                  ip_key="Risk",
                                  ip_values=["LOW", "MEDIUM", "HIGH"],
                                  show_avg_per_run=show_avg_per_run,
                                  show_fraction=show_fraction,
                                  expected_values=expected_values,
                                  img_dir=img_dir)
    else:
        plot_population_by_ip(dir_or_filename=dir_or_filename,
                              node_id=None,
                              gender=gender,
                              age_bin_list=age_bin_list,
                              ip_key="Risk",
                              ip_values=["LOW", "MEDIUM", "HIGH"],
                              show_avg_per_run=show_avg_per_run,
                              show_fraction=show_fraction,
                              expected_values=None,
                              img_dir=img_dir)

plot_risk_zambia(dir_or_filename, age_bin_list=None, show_avg_per_run=False, show_fraction=False, show_expected=False, img_dir=None)

Create multiple risk value plots where each plot is for a specific node and gender. The plot can show the count or fraction of the group that has one of the three risk values. The plot can also show the expected values for specific node and gender for Zambia.

Parameters:

Name Type Description Default
dir_or_filename (str, required)

The directory or filename containing the ReportHIVByAgeAndGender.csv files.

required
age_bin_list list[float]

A list of ages in years where the population with risk value will be counted for each bin. For example, if you enter [10, 25, 30, 55], there will be three age bins with the following ranges: [10-25), [25-30), [30-55)

None
show_avg_per_run bool

If 'dir_or_filename' is a directory, this will calculate the average number of people with the given risk type at a given time step between the files. Default is False.

False
show_fraction bool

True indicates that the data is not true counts but a fraction (i.e. a count divided by another counter)

False
show_expected bool

True indicates that you want to see the expected fractions of the population that have the different risk values PER NODE. False will be data for all nodes.

False
img_dir str

Directory to save the images. If None, the images will not be saved and a window will be opened.

None

Returns:

Source code in emodpy_hiv/plotting/plot_hiv_by_age_and_gender.py
def plot_risk_zambia(dir_or_filename: str,
                     age_bin_list: list[float] = None,
                     show_avg_per_run: bool = False,
                     show_fraction: bool = False,
                     show_expected: bool = False,
                     img_dir: str = None):
    """
    Create multiple risk value plots where each plot is for a specific node and gender.
    The plot can show the count or fraction of the group that has one of the three
    risk values.  The plot can also show the expected values for specific node and gender
    for Zambia.

    Args:
        dir_or_filename (str, required):
            The directory or filename containing the ReportHIVByAgeAndGender.csv files.

        age_bin_list (list[float], optional):
            A list of ages in years where the population with risk value will be counted
            for each bin.  For example, if you enter [10, 25, 30, 55], there will be three
            age bins with the following ranges: [10-25), [25-30), [30-55)

        show_avg_per_run (bool, optional):
            If 'dir_or_filename' is a directory, this will calculate the average number of
            people with the given risk type at a given time step between the files.
            Default is False.

        show_fraction (bool, optional):
            True indicates that the data is not true counts but a fraction
            (i.e. a count divided by another counter)

        show_expected (bool, optional):
            True indicates that you want to see the expected fractions of the population that
            have the different risk values PER NODE.  False will be data for all nodes.

        img_dir (str, optional):
            Directory to save the images. If None, the images will not be saved and a window will be opened.

    Returns:
    """
    starting_expected_values = {
        "LOW": 0.85,
        "MEDIUM": 0.15,
        "HIGH": 0.0
    }
    node_to_high = []
    gender_list = ["Female", "Male"]
    for gender in gender_list:
        if gender == "Female":
            # node_to_high = [0.125, 0.125, 0.125, 0.0500, 0.0500, 0.0500, 0.125, 0.0500, 0.125, 0.0500]
            node_to_high = [0.125, 0.125, 0.125, 0.0500]
        else:
            # node_to_high = [0.195, 0.195, 0.195, 0.0781, 0.0781, 0.0781, 0.195, 0.0781, 0.195, 0.0781]
            node_to_high = [0.195, 0.195, 0.195, 0.0781]

        if not show_expected:
            starting_expected_values = None
            node_to_high = None
        plot_risk(dir_or_filename=dir_or_filename,
                  starting_expected_values=starting_expected_values,
                  expected_value_for_high_per_node=node_to_high,
                  gender=gender,
                  age_bin_list=age_bin_list,
                  show_avg_per_run=show_avg_per_run,
                  show_fraction=show_fraction,
                  img_dir=img_dir)

plot_vmmc_by_age(dir_or_filename, exp_dir_or_filename=None, node_id=None, age_bin_list=None, show_avg_per_run=False, img_dir=None)

Create a plot showing information about the men who are circumcised.

Parameters:

Name Type Description Default
dir_or_filename (str, required)

The directory or filename containing the ReportHIVByAgeAndGender.csv files.

required
exp_dir_or_filename (str, required)

The expected or alternate directory or filename containing the ReportHIVByAgeAndGender.csv files.

None
node_id int

The ID of the node for which the data is being filtered for.

None
age_bin_list list[float]

A list of ages in years where the population with risk value will be counted for each bin. For example, if you enter [10, 25, 30, 55], there will be three age bins with the following ranges: [10-25), [25-30), [30-55)

None
show_avg_per_run bool

If 'dir_or_filename' is a directory, this will calculate the average number of people with the given risk type at a given time step between the files. Default is False.

False
img_dir str

Directory to save the images. If None, the images will not be saved and a window will be opened.

None

Returns:

Source code in emodpy_hiv/plotting/plot_hiv_by_age_and_gender.py
def plot_vmmc_by_age(dir_or_filename: str,
                     exp_dir_or_filename: str = None,
                     node_id: int = None,
                     age_bin_list: list[float] = None,
                     show_avg_per_run: bool = False,
                     img_dir: str = None):
    """
    Create a plot showing information about the men who are circumcised.

    Args:
        dir_or_filename (str, required):
            The directory or filename containing the ReportHIVByAgeAndGender.csv files.

        exp_dir_or_filename (str, required):
            The expected or alternate directory or filename containing the ReportHIVByAgeAndGender.csv files.

        node_id (int, optional):
            The ID of the node for which the data is being filtered for.

        age_bin_list (list[float], optional):
            A list of ages in years where the population with risk value will be counted
            for each bin.  For example, if you enter [10, 25, 30, 55], there will be three
            age bins with the following ranges: [10-25), [25-30), [30-55)

        show_avg_per_run (bool, optional):
            If 'dir_or_filename' is a directory, this will calculate the average number of
            people with the given risk type at a given time step between the files.
            Default is False.

        img_dir (str, optional):
            Directory to save the images. If None, the images will not be saved and a window will be opened.

    Returns:
    """
    base_plot_by_age(base_title="VMMC",
                     main_column_name=COL_NAME_POP,
                     dir_or_filename=dir_or_filename,
                     exp_dir_or_filename=exp_dir_or_filename,
                     node_id=node_id,
                     gender="Male",
                     age_bins=age_bin_list,
                     show_avg_per_run=show_avg_per_run,
                     show_fraction=False,
                     fraction_of=False,
                     fraction_of_column_name="",
                     fraction_of_str=None,
                     filter_by_hiv_negative=True,
                     other_strat_column_name=COL_NAME_IS_CIRC,
                     other_strat_value_a=1,  # Only show circumcised
                     other_strat_value_b=0,  # Not circumcised
                     img_dir=img_dir)

plot_vmmc_for_dir(dir_or_filename, node_id=None, age_bin_list=None, show_expected=False, show_avg_per_run=False, img_dir=None)

Create a plot showing what fraction of men are circumcised over time versus the expected number of circumcised men.

Parameters:

Name Type Description Default
dir_or_filename (str, required)

The directory or filename containing the ReportHIVByAgeAndGender.csv files.

required
node_id int

The ID of the node for which the data is being filtered for.

None
age_bin_list list[float]

A list of ages in years where the population with risk value will be counted for each bin. For example, if you enter [10, 25, 30, 55], there will be three age bins with the following ranges: [10-25), [25-30), [30-55)

None
show_expected bool

If true, plot the expected fraction of circumcisions.

False
show_avg_per_run bool

If 'dir_or_filename' is a directory, this will calculate the average number of people with the given risk type at a given time step between the files. Default is False.

False
img_dir str

Directory to save the images. If None, the images will not be saved and a window will be opened.

None

Returns:

Source code in emodpy_hiv/plotting/plot_hiv_by_age_and_gender.py
def plot_vmmc_for_dir(dir_or_filename: str,
                      node_id: int = None,
                      age_bin_list: list[float] = None,
                      show_expected: bool = False,
                      show_avg_per_run: bool = False,
                      img_dir: str = None):
    """
    Create a plot showing what fraction of men are circumcised over time versus the
    expected number of circumcised men.

    Args:
        dir_or_filename (str, required):
            The directory or filename containing the ReportHIVByAgeAndGender.csv files.

        node_id (int, optional):
            The ID of the node for which the data is being filtered for.

        age_bin_list (list[float], optional):
            A list of ages in years where the population with risk value will be counted
            for each bin.  For example, if you enter [10, 25, 30, 55], there will be three
            age bins with the following ranges: [10-25), [25-30), [30-55)

        show_expected (bool, optional):
            If true, plot the expected fraction of circumcisions.

        show_avg_per_run (bool, optional):
            If 'dir_or_filename' is a directory, this will calculate the average number of
            people with the given risk type at a given time step between the files.
            Default is False.

        img_dir (str, optional):
            Directory to save the images. If None, the images will not be saved and a window will be opened.

    Returns:
    """
    if node_id is None and show_expected:
        raise ValueError("You need to specify 'node_id' if you want to compare actual data against the expected values.")

    dir_filenames = helpers.get_filenames(dir_or_filename=dir_or_filename,
                                          file_prefix="ReportHIVByAgeAndGender",
                                          file_extension=".csv")

    combined_df = pd.DataFrame()
    for fn in dir_filenames:
        df_circ, not_used = extract_population_data_multiple_ages(fn,
                                                                  node_id=node_id,
                                                                  gender="Male",
                                                                  age_bin_list=age_bin_list,
                                                                  other_strat_column_name=COL_NAME_IS_CIRC,
                                                                  other_strat_value=1)
        df_un_circ, not_used = extract_population_data_multiple_ages(fn,
                                                                     node_id=node_id,
                                                                     gender="Male",
                                                                     age_bin_list=age_bin_list,
                                                                     other_strat_column_name=COL_NAME_IS_CIRC,
                                                                     other_strat_value=0)
        if len(combined_df.columns) == 0:
            combined_df.index = df_circ.index
            for column_name in df_circ.columns:
                name = column_name
                if not show_avg_per_run:
                    name = fn + "-" + name
                combined_df[name] = 0
        for column_name in df_circ.columns:
            if show_avg_per_run:
                combined_df[column_name] = combined_df[column_name] + df_circ[column_name] / (df_circ[column_name] + df_un_circ[column_name])
            else:
                name = fn + "-" + column_name
                combined_df[name] = df_circ[column_name] / (df_circ[column_name] + df_un_circ[column_name])

    if show_avg_per_run:
        for column_name in combined_df.columns:
            combined_df[column_name] = combined_df[column_name] / len(dir_filenames)

    traditional_coverage_per_node = [0.054978651, 0.139462861, 0.028676043, 0.091349358, 0.123187070,
                                     0.039308099, 0.727917322, 0.041105263, 0.044388102, 0.398239794]
    rtec_x1 = 2016
    rtec_x2 = 2021
    rtec_y1 = 0.54
    rtec_y2 = 0.9

    expected_df = None
    if show_expected:
        expected_df = pd.DataFrame()
        expected_df.index = combined_df.index
        expected_df["Expected Traditional"] = traditional_coverage_per_node[node_id - 1]
        years = combined_df.index.values.astype(float).tolist()
        rtec = []
        for year in years:
            if year < rtec_x1:
                rtec.append(0)
            elif year > rtec_x2:
                rtec.append(rtec_y2)
            else:
                rtec.append(rtec_y1 + (year - rtec_x1) * (rtec_y2 - rtec_y1) / (rtec_x2 - rtec_x1))
        expected_df["Expected Plus Medical"] = rtec

    title = "Circumcised Men"
    if show_avg_per_run:
        title = title + " - Average"
    if node_id is not None:
        title = title + " - Node " + str(node_id)

    title2 = None
    if TEST_include_dir_or_filename:
        title2 = dir_or_filename

    xy_plot.xy_plot(img_dir=img_dir,
                    df=combined_df,
                    expected_df=expected_df,
                    title_1=title,
                    title_2=title2,
                    y_axis_name="Fraction of Circumcised Men",
                    fraction_of_total=False,
                    show_legend=show_avg_per_run,
                    show_markers=show_avg_per_run,
                    min_x=None, max_x=None, min_y=None, max_y=None,
                    x_axis_as_log_scale=False,
                    y_axis_as_log_scale=False)