API

`build_map(gdf_stats, title, bins_type, n_bins=5, clip_quantile=0.01, map_tiles_provider='OpenStreetMap', optimise_choropleth_size=True, coordinates_start=(47.39, 8.53), zoom_start=15, max_n_hectares_to_display=None, plot_boundaries=False, fill_opacity=0.6)` ¶

Generate a folium map with the some statistics per hectare added as a choropleth alayer on top.

See FAQ for detailed explanation and typical use-cases of more complex attributes.

Parameters:

Name	Type	Description	Default
`gdf_stats`	`GeoDataFrame`	input table with the hectare polygons and statistics values.	required
`title`	`str`	the name to be displayed as the color-map title as well as the entry in the control panel.	required
`bins_type`	`str`	The type of binning for the choropleth. Accepted values are "equidistant" and "quantiles" that would define bins either on the linear or on the quantile scale.	required
`n_bins`	`int`	number of bins to use. Defaults to 5.	`5`
`clip_quantile`	`Optional[float]`	clip/winzorise statistics values to specific quntiles. The effect is double-sided. Set to None to avoid any clipping. Defaults to 0.01.	`0.01`
`map_tiles_provider`	`str`	Map tileset to use. The value is passed over to folium.Map. Defaults to "OpenStreetMap".	`'OpenStreetMap'`
`optimise_choropleth_size`	`bool`	Set to True to reduce precision of polygon coordinates to a pre-defined optimised value that will keep hectare boundary precision. This allows to reduce the size of the map HTML dump on disk. Defaults to True.	`True`
`coordinates_start`	`Tuple[float, float]`	starting coordinated in longiotude and latitude. Defaults to (47.39, 8.53) [Zürich].	`(47.39, 8.53)`
`zoom_start`	`int`	starting zoom. Defaults to 15.	`15`
`max_n_hectares_to_display`	`Optional[int]`	the top-N hectares to display. Defaults to None.	`None`
`plot_boundaries`	`bool`	set to True to display boundaries between choroplet elements. This is useful if you visualise not hectares but some administrative entities. Defaults to False.	`False`
`fill_opacity`	`float`	opacity of the choropleth. Defaults to 0.6.	`0.6`

Raises:

Type	Description
`ValueError`	unsupported `bins_type` provided.

Returns:

Type	Description
`Map`	folium.Map: the map with cholopleth layer

Source code in src/map_with_stats/map.py

def build_map(
    gdf_stats: gpd.GeoDataFrame,
    title: str,
    bins_type: str,
    n_bins: int = 5,
    clip_quantile: Optional[float] = 0.01,
    map_tiles_provider: str = "OpenStreetMap",
    optimise_choropleth_size: bool = True,
    coordinates_start: Tuple[float, float] = (47.39, 8.53),
    zoom_start: int = 15,
    max_n_hectares_to_display: Optional[int] = None,
    plot_boundaries: bool = False,
    fill_opacity: float = 0.6,
) -> folium.Map:
    """Generate a folium map with the some statistics per hectare added as a choropleth alayer on top.

    See [FAQ](faq) for detailed explanation and typical use-cases of more complex attributes.

    Args:
        gdf_stats (gpd.GeoDataFrame): input table with the hectare polygons and statistics values.
        title (str): the name to be displayed as the color-map title as well as
            the entry in the control panel.
        bins_type (str): The type of binning for the choropleth.
            Accepted values are _"equidistant"_ and _"quantiles"_
            that would define bins either on the linear or on the quantile scale.
        n_bins (int, optional): number of bins to use. Defaults to 5.
        clip_quantile (Optional[float], optional): clip/winzorise statistics values to specific quntiles.
            The effect is double-sided.
            Set to _None_ to avoid any clipping.
            Defaults to 0.01.
        map_tiles_provider (str, optional): Map tileset to use.
            The value is passed over to [folium.Map](https://python-visualization.github.io/folium/modules.html#folium.folium.Map).
            Defaults to "OpenStreetMap".
        optimise_choropleth_size (bool, optional): Set to True to reduce precision of polygon
            coordinates to a pre-defined optimised value that will keep hectare boundary precision.
            This allows to reduce the size of the map HTML dump on disk. Defaults to True.
        coordinates_start (Tuple[float, float], optional): starting coordinated in longiotude and latitude.
            Defaults to (47.39, 8.53) [Zürich].
        zoom_start (int, optional): starting zoom. Defaults to 15.
        max_n_hectares_to_display (Optional[int], optional): the top-N hectares to display.
            Defaults to None.
        plot_boundaries (bool, optional): set to True to display boundaries between choroplet elements.
            This is useful if you visualise not hectares but some administrative entities.
            Defaults to False.
        fill_opacity (float, optional): opacity of the choropleth. Defaults to 0.6.

    Raises:
        ValueError: unsupported `bins_type` provided.

    Returns:
        folium.Map: the map with cholopleth layer
    """
    _check_cols_in_df(gdf_stats, ["geometry", "value"])

    # make a copy to avoid modifying input data in-place
    gdf = gdf_stats.copy(deep=True)

    ch_map = folium.Map(
        location=coordinates_start, zoom_start=zoom_start, tiles=map_tiles_provider
    )

    # optimise the number of hectares to be displayed as choropleth
    # display becomes very slow and sometimes stops work for a very large number fo elements
    if max_n_hectares_to_display:
        cumsum = gdf_stats["value"].value_counts().sort_index(ascending=False).cumsum()
        min_value = cumsum[cumsum <= max_n_hectares_to_display].index[-1]
        gdf = gdf[gdf["value"] >= min_value]
    # coordinates of the vetrices of hectares are stores with double precision,
    # that is more  than neccessary and will lead to wasted space, when stored in html
    # the number of digits after comma has been optimised to lead differences less than 1m in LV03 CRS
    if optimise_choropleth_size:
        gdf["geometry"] = gdf["geometry"].apply(
            _round_coordinates, n_digits_after_comma=6
        )
    # get values to be used for colors in the choropleth
    cliped_values = gdf["value"]
    if clip_quantile is not None:
        clip_min, clip_max = cliped_values.quantile((clip_quantile, 1 - clip_quantile))
        cliped_values = cliped_values.clip(clip_min, clip_max)
    # get either number of equidistant bins or bins defined by quantiles
    bins: Union[int, List[float]]
    if bins_type == "quantiles":
        quantiles = [i / (n_bins) for i in range(n_bins + 1)]
        bins = list(cliped_values.quantile(quantiles))
    elif bins_type == "equidistant":
        bins = n_bins
    else:
        raise ValueError(f"Unexpected type of bins: {bins_type}")

    if plot_boundaries:
        # thin gray line
        line_weight = 0.5
        line_color = "gray"
    else:
        # no line, for example around hectares
        line_weight = 0
        line_color = "black"
    # coloured choropleth with the colour reflecting the statistics value
    # for example squares in the case of hectares
    choropleth = folium.Choropleth(
        geo_data=gdf,
        data=cliped_values,
        key_on="feature.id",  # index of the geodataframe is transformed into `id` field
        fill_color="YlOrBr",
        fill_opacity=fill_opacity,
        bins=bins,
        line_weight=line_weight,
        line_color=line_color,
        nan_fill_opacity=1,  # fully transparent hectares if the valueis mising
        legend_name=title,  # title under the color scale
        name=title,  # name of thew layer, e.g. in the layer control
    ).add_to(ch_map)

    # optional X and Y coordinates that would appear for hectares, but not for administrative regions
    cols_xy = [c for c in ["X", "Y"] if c in gdf]
    labels_xy = ["LV03 X:", "LV03 Y:"] if cols_xy else []
    # add tooltip to appear, when pointing at a hectar
    tooltip = folium.GeoJsonTooltip(
        # column names with values to be displayed
        fields=cols_xy + ["value"],
        # text to be shown explaining each value
        aliases=labels_xy + [f"{title}:"],
        localize=True,
        max_width=800,
    )
    tooltip.add_to(choropleth.geojson)

    folium.LayerControl().add_to(ch_map)
    return ch_map

`create_geo_df_with_hectar_polygons(df, col_value, crs_out='EPSG:4326', crs_in='EPSG:21781', grid_size=100)` ¶

Generate hectare polygons.

Given a pandas dataframe with "X", "Y" coordinates of the bottom-left (south-west) corner of the hectares and a column col_value with values, create a geopandas geodataframe with hectare polygons and the selected column with values.

Parameters:

Name	Type	Description	Default
`df`	`DataFrame`	input data. The code expects to find the following columns: "X", "Y", `col_value`.	required
`col_value`	`str`	column name with some values	required
`crs_out`	`str`	coordinate system to which output polygons are transformed. Defaults to "EPSG:4326".	`'EPSG:4326'`
`crs_in`	`str`	coordinate system of the input data. Defaults to "EPSG:21781" (=LV03).	`'EPSG:21781'`
`grid_size`	`int`	grid size in meters. Defaults to 100 ()i.e. hectar.	`100`

Returns:

Type	Description
`GeoDataFrame`	gpd.GeoDataFrame: a table with hectare polygons and the values.

Source code in src/map_with_stats/data.py

def create_geo_df_with_hectar_polygons(
    df: pd.DataFrame,
    col_value: str,
    crs_out: str = "EPSG:4326",
    crs_in: str = "EPSG:21781",
    grid_size: int = 100,
) -> gpd.GeoDataFrame:
    """Generate hectare polygons.

    Given a pandas dataframe with _"X", "Y"_ coordinates of the bottom-left (south-west) corner
    of the hectares and a column `col_value` with values, create a geopandas geodataframe with
    hectare polygons and the selected column with values.

    Args:
        df (pd.DataFrame): input data. The code expects to find the following columns: _"X", "Y", `col_value`_.
        col_value (str): column name with some values
        crs_out (str, optional): coordinate system to which output polygons are transformed.
            Defaults to "EPSG:4326".
        crs_in (str, optional): coordinate system of the input data. Defaults to "EPSG:21781" (=LV03).
        grid_size (int, optional): grid size in meters. Defaults to 100 ()i.e. hectar.

    Returns:
        gpd.GeoDataFrame: a table with hectare polygons and the values.
    """
    _check_cols_in_df(df, ["X", "Y", col_value])
    polygons = []
    for row in df.itertuples():
        x, y = row.X, row.Y
        polygons.append(
            shapely.Polygon(
                [
                    (x, y),
                    (x + grid_size, y),
                    (x + grid_size, y + grid_size),
                    (x, y + grid_size),
                ]
            )
        )
    gdf_stats = gpd.GeoDataFrame(
        {
            "geometry": polygons,
            "value": df[col_value],
            "hectare_id": df["X"] // grid_size * 10_000 + df["Y"] // grid_size,
            "X": df["X"],
            "Y": df["Y"],
        },
        index=df.index,
        crs=crs_in,
    )
    # transform into the desired coordinate system
    gdf_stats = gdf_stats.to_crs(crs_out)
    return gdf_stats

`filter_xy(df, ch_x_min, ch_y_min, ch_x_max, ch_y_max)` ¶

Filter input data to be within range of X and Y coordinates.

Parameters:

Name	Type	Description	Default
`df`	`DataFrame`	inout data. The code expects to find the following columns: "X", "Y".	required
`ch_x_min`	`float`	minimum value of X	required
`ch_y_min`	`float`	minimum value of Y	required
`ch_x_max`	`float`	maximum value of X	required
`ch_y_max`	`float`	maximum value of Y	required

Returns:

Type	Description
`DataFrame`	pd.DataFrame: input data with the restricted "X", "Y" values

Source code in src/map_with_stats/data.py

def filter_xy(
    df: pd.DataFrame, ch_x_min: float, ch_y_min: float, ch_x_max: float, ch_y_max: float
) -> pd.DataFrame:
    """Filter input data to be within range of X and Y coordinates.

    Args:
        df (pd.DataFrame): inout data. The code expects to find the following columns: _"X", "Y"_.
        ch_x_min (float): minimum value of X
        ch_y_min (float): minimum value of Y
        ch_x_max (float): maximum value of X
        ch_y_max (float): maximum value of Y

    Returns:
        pd.DataFrame: input data with the restricted _"X", "Y"_ values
    """
    _check_cols_in_df(df, ["X", "Y"])
    mask_x = df["X"].between(ch_x_min, ch_x_max)
    mask_y = df["Y"].between(ch_y_min, ch_y_max)
    mask_in_range = mask_x & mask_y
    return df[mask_in_range]

`hectare2xy(df, col_hectare='hectare_id')` ¶

Convert hectare ID into (x,y) coorsinates in the LV03 coordinate system.

Parameters:

Name	Type	Description	Default
`df`	`DataFrame`	input table.	required
`col_hectare`	`str`	name of the column with hectare IDs. The common convention is that coordinates x=ABCDEF, y=ZYXWVT are represented by a hectare ID ABCDZYXW. Defaults to "hectare_id".	`'hectare_id'`

Returns:

Type	Description
`DataFrame`	pd.DataFrame: input table with "X", "Y" columns added (or overwritten, if they existed in the input)

Source code in src/map_with_stats/utils.py

def hectare2xy(df: pd.DataFrame, col_hectare: str = "hectare_id") -> pd.DataFrame:
    """Convert hectare ID into (x,y) coorsinates in the LV03 coordinate system.

    Args:
        df (pd.DataFrame): input table.
        col_hectare (str, optional): name of the column with hectare IDs.
            The common convention is that coordinates _x=ABCDEF_, _y=ZYXWVT_
            are represented by a hectare ID _ABCDZYXW_.
            Defaults to "hectare_id".

    Returns:
        pd.DataFrame: input table with _"X", "Y"_ columns added (or overwritten,
            if they existed in the input)
    """
    _check_cols_in_df(df, [col_hectare])
    df_out = df.copy(deep=True)
    df_out["X"] = df_out[col_hectare] // 10_000 * 100
    df_out["Y"] = df_out[col_hectare] % 10_000 * 100
    return df_out

API

build_map(gdf_stats, title, bins_type, n_bins=5, clip_quantile=0.01, map_tiles_provider='OpenStreetMap', optimise_choropleth_size=True, coordinates_start=(47.39, 8.53), zoom_start=15, max_n_hectares_to_display=None, plot_boundaries=False, fill_opacity=0.6) ¶

create_geo_df_with_hectar_polygons(df, col_value, crs_out='EPSG:4326', crs_in='EPSG:21781', grid_size=100) ¶

filter_xy(df, ch_x_min, ch_y_min, ch_x_max, ch_y_max) ¶

hectare2xy(df, col_hectare='hectare_id') ¶

`build_map(gdf_stats, title, bins_type, n_bins=5, clip_quantile=0.01, map_tiles_provider='OpenStreetMap', optimise_choropleth_size=True, coordinates_start=(47.39, 8.53), zoom_start=15, max_n_hectares_to_display=None, plot_boundaries=False, fill_opacity=0.6)` ¶

`create_geo_df_with_hectar_polygons(df, col_value, crs_out='EPSG:4326', crs_in='EPSG:21781', grid_size=100)` ¶

`filter_xy(df, ch_x_min, ch_y_min, ch_x_max, ch_y_max)` ¶

`hectare2xy(df, col_hectare='hectare_id')` ¶