Scatter

`dimensionality_reduction(data, reduction_funcs, n_components, target_column, title='Dimensionality reduction plot', scaler_func='StandardScaler', size=None, show_legend=None, reduction_func_kwargs=None, plotly_kwargs=None, show=True, write_html_path=None)` ¶

Create a scatter plot of the dimensionality reduction representation of the data provided. Multiple dimensionality reduction functions can be used. The data is scaled using the scaler_func. If multiple functions are used, the plots are arranged in a grid using.

make_subplots from blitzly ⚡️.

Example:

from blitzly.plots.scatter import dimensionality_reduction
import plotly.express as px

df = px.data.iris()
fig = dimensionality_reduction(
    df,
    n_components=2,
    target_column="species",
    reduction_funcs=["PCA", "TNSE"],
)

Parameters:

Name	Type	Description	Default
`data`	`pd.DataFrame`	Data to plot.	required
`reduction_funcs`	`Union[str, List[str]]`	Dimensionality reduction function(s) to use. The following functions are supported: NMF, PCA, IncrementalPCA, KernelPCA, MiniBatchSparsePCA, SparsePCA, TruncatedSVD, TSNE.	required
`n_components`	`int`	Number of components to use. This parameter is passed to the dimensionality reduction function.	required
`target_column`	`str`	Column to use as the color dimension.	required
`title`	`Optional[str]`	Title of the plot. Defaults to "Dimensionality reduction plot".	`'Dimensionality reduction plot'`
`scaler_func`	`Optional[str]`	Scaler function to use. Defaults to "StandardScaler". The following functions are supported: StandardScaler, MinMaxScaler.	`'StandardScaler'`
`size`	`Optional[Tuple[int, int]`	Size of the full plot.	`None`
`show_legend`	`Optional[bool]`	Whether to show the legend.	`None`
`show`	`bool`	Whether to show the figure.	`True`
`write_html_path`	`Optional[str]`	The path to which the histogram should be written as an HTML file. If None, the histogram will not be saved.	`None`

Source code in blitzly/plots/scatter.py

def dimensionality_reduction(
    data: pd.DataFrame,
    reduction_funcs: Union[str, List[str]],
    n_components: int,
    target_column: str,
    title: str = "Dimensionality reduction plot",
    scaler_func: str = "StandardScaler",
    size: Optional[Tuple[int, int]] = None,
    show_legend: Optional[bool] = None,
    reduction_func_kwargs: Optional[dict] = None,
    plotly_kwargs: Optional[dict] = None,
    show: bool = True,
    write_html_path: Optional[str] = None,
) -> BaseFigure:
    """Create a scatter plot of the dimensionality reduction representation of
    the data provided. Multiple dimensionality reduction functions can be used.
    The data is scaled using the `scaler_func`. If multiple functions are used,
    the plots are arranged in a grid using.

    [`make_subplots`](https://invia-flights.github.io/blitzly/plots/subplots/#blitzly.subplots.make_subplots) from blitzly ⚡️.

    Example:
    ```python
    from blitzly.plots.scatter import dimensionality_reduction
    import plotly.express as px

    df = px.data.iris()
    fig = dimensionality_reduction(
        df,
        n_components=2,
        target_column="species",
        reduction_funcs=["PCA", "TNSE"],
    )
    ```

    Args:
        data (pd.DataFrame): Data to plot.
        reduction_funcs (Union[str, List[str]]): Dimensionality reduction function(s) to use. The following functions are supported:
            NMF, PCA, IncrementalPCA, KernelPCA, MiniBatchSparsePCA, SparsePCA, TruncatedSVD, TSNE.
        n_components (int): Number of components to use. This parameter is passed to the dimensionality reduction function.
        target_column (str): Column to use as the color dimension.
        title (Optional[str]): Title of the plot. Defaults to "Dimensionality reduction plot".
        scaler_func (Optional[str]): Scaler function to use. Defaults to "StandardScaler". The following functions are supported:
            StandardScaler, MinMaxScaler.
        size (Optional[Tuple[int, int]): Size of the full plot.
        show_legend (Optional[bool]): Whether to show the legend.
        show (bool): Whether to show the figure.
        write_html_path (Optional[str]): The path to which the histogram should be written as an HTML file.
            If None, the histogram will not be saved.
    """

    func_list = [
        "NMF",
        "PCA",
        "IncrementalPCA",
        "KernelPCA",
        "MiniBatchSparsePCA",
        "SparsePCA",
        "TruncatedSVD",
        "TSNE",
    ]

    if isinstance(reduction_funcs, str):
        reduction_funcs = [reduction_funcs]

    if scaler_func not in ["StandardScaler", "MinMaxScaler"]:
        raise ValueError(
            f"scaler_func must be one of ['StandardScaler', 'MinMaxScaler]! `{scaler_func}` not supported."
        )

    if n_components == 3 and len(reduction_funcs) > 1:
        raise Warning(
            "Cannot plot more than one plot in 3D! Please either set `n_components` to 2 or `reduction_funcs` to one function."
        )

    df = check_data(
        data, min_rows=1, min_columns=1, as_pandas=True, only_numerical_values=False
    )

    numerical_df = data.select_dtypes(include=[int, float])
    scaler = getattr(sk_pre, scaler_func)()
    numerical_df = scaler.fit_transform(numerical_df)

    plots = []
    for func in reduction_funcs:
        if func not in func_list:
            raise ValueError(
                f"reduction_func must be one of {func_list}! `{func}` not supported."
            )

        if func == "TSNE":
            method: Callable = TSNE
        else:
            method: Callable = getattr(sk_decomp, func)  # type: ignore

        red_func = method(n_components=n_components, **reduction_func_kwargs or {})
        projections = red_func.fit_transform(numerical_df)

        if n_components == 2:
            fig = px.scatter(
                projections,
                x=0,
                y=1,
                color=df[target_column],
                labels={"color": target_column},
                **plotly_kwargs or {},
            )
        elif n_components == 3:
            fig = px.scatter_3d(
                projections,
                x=0,
                y=1,
                z=2,
                color=df[target_column],
                labels={"color": target_column},
                **plotly_kwargs or {},
            )
        else:
            raise ValueError("`n_components` must be 2 or 3!")

        plots.append(update_figure_layout(fig, title + f" ({func})", size=None))

    len_plots = len(plots)
    shape = (int(np.ceil(len_plots / 2)), min(len_plots, 2))

    show_legend = show_legend if show_legend and len(plots) == 1 else False

    fig = (
        make_subplots(
            plots,
            shape,
            size=size,
            fill_row=True,
            show_legend=show_legend,
            show=False,
        )
        if n_components == 2
        else plots[0]
    )
    return save_show_return(fig, write_html_path, show)

`multi_scatter(data, x_y_columns, modes=None, title='Scatter plot', size=None, show_legend=True, plotly_kwargs=None, show=True, write_html_path=None)` ¶

Create a multi scatter plot. It can be used to visualize the relationship between multiple variables from the same Pandas DataFrame.

Example:

from blitzly.plots.scatter import multi_scatter
import numpy as np
import pandas as pd

random_a = np.linspace(0, 1, 100)
random_b = np.random.randn(100) + 5
random_c = np.random.randn(100)
random_d = np.random.randn(100) - 5
data = np.array([random_a, random_b, random_c, random_d]).T

multi_scatter(
    data=pd.DataFrame(data, columns=["foo", "bar", "blitz", "licht"]),
    x_y_columns=[("foo", "bar"), ("foo", "blitz"), ("foo", "licht")],
    modes=["lines", "markers", "lines+markers"],
    plotly_kwargs={"line": {"color": "black"}},

Parameters:

Name	Type	Description	Default
`data`	`pd.DataFrame`	Data to plot. Must be a Pandas DataFrame.	required
`x_y_columns`	`List[Tuple[str, str]]`	List of tuples containing the x and y columns. Those columns will be used for `x` and `y` in the scatter plot. Since it is a multi scatter plot, multiple columns can be used by passing a list of tuples.	required
`modes`	`Optional[List[str]]`	List of modes for the scatter plot. If `None` the `"markers"` mode is used.	`None`
`title`	`str`	Title of the plot.	`'Scatter plot'`
`size`	`OptionalTuple[int, int]`	Size of the plot - height and width.	`None`
`show_legend`	`bool`	Whether to show the legend.	`True`
`plotly_kwargs`	`Optional[dict]`	Additional plotly kwargs.	`None`
`show`	`bool`	Whether to show the figure.	`True`
`write_html_path`	`Optional[str]`	The path to which the histogram should be written as an HTML file. If None, the histogram will not be saved.	`None`

Returns:

Name	Type	Description
`BaseFigure`	`BaseFigure`	The multi scatter plot.

Source code in blitzly/plots/scatter.py

def multi_scatter(
    data: pd.DataFrame,
    x_y_columns: List[Tuple[str, str]],
    modes: Optional[List[str]] = None,
    title: str = "Scatter plot",
    size: Optional[Tuple[int, int]] = None,
    show_legend: bool = True,
    plotly_kwargs: Optional[dict] = None,
    show: bool = True,
    write_html_path: Optional[str] = None,
) -> BaseFigure:
    """Create a multi scatter plot. It can be used to visualize the
    relationship between multiple variables from the same Pandas DataFrame.

    Example:
    ```python
    from blitzly.plots.scatter import multi_scatter
    import numpy as np
    import pandas as pd

    random_a = np.linspace(0, 1, 100)
    random_b = np.random.randn(100) + 5
    random_c = np.random.randn(100)
    random_d = np.random.randn(100) - 5
    data = np.array([random_a, random_b, random_c, random_d]).T

    multi_scatter(
        data=pd.DataFrame(data, columns=["foo", "bar", "blitz", "licht"]),
        x_y_columns=[("foo", "bar"), ("foo", "blitz"), ("foo", "licht")],
        modes=["lines", "markers", "lines+markers"],
        plotly_kwargs={"line": {"color": "black"}},
    ```

    Args:
        data (pd.DataFrame): Data to plot. Must be a Pandas DataFrame.
        x_y_columns (List[Tuple[str, str]]): List of tuples containing the x and y columns.
            Those columns will be used for `x` and `y` in the scatter plot.
            Since it is a multi scatter plot, multiple columns can be used by passing a list of tuples.
        modes (Optional[List[str]]): List of modes for the scatter plot. If `None` the `"markers"` mode is used.
        title (str): Title of the plot.
        size (OptionalTuple[int, int]): Size of the plot - height and width.
        show_legend (bool): Whether to show the legend.
        plotly_kwargs (Optional[dict]): Additional plotly kwargs.
        show (bool): Whether to show the figure.
        write_html_path (Optional[str]): The path to which the histogram should be written as an HTML file.
            If None, the histogram will not be saved.


    Returns:
        BaseFigure: The multi scatter plot.
    """

    df = check_data(data, min_rows=1, min_columns=1, as_pandas=True)

    if len([i for i in list(sum(x_y_columns, ())) if i not in df.columns]) > 0:
        raise ValueError(
            f"""
            Columns {list(set(list(sum(x_y_columns, ()))) - set(df.columns))} not in `data`!
            All columns passed in `x_y_columns` must be in `data`.
            """
        )

    if modes and len(modes) != len(x_y_columns):
        raise ValueError(
            f"""
            Length of `modes` ({len(modes)}) must be equal to length of `x_y_columns` ({len(x_y_columns)})!
            Or `modes` must be `None`.
            """
        )

    fig = go.Figure()
    for idx, item in enumerate(x_y_columns):
        fig.add_trace(
            go.Scatter(
                x=df[item[0]],
                y=df[item[1]],
                mode=modes[idx] if modes else "markers",
                name=list(df.columns)[idx],
                **plotly_kwargs or {},
            )
        )

    fig = update_figure_layout(fig, title, size, show_legend)
    return save_show_return(fig, write_html_path, show)

`scatter_matrix(data, dimensions=None, color_dim=None, show_upper_half=False, diagonal_visible=False, show_scale=False, title='Scatter matrix', marker_line_color='white', marker_line_width=0.5, marker_color_scale='Plasma', size=None, show_legend=False, show=True, write_html_path=None)` ¶

Create a scatter matrix plot. It can be used to visualize the relationship between multiple variables. The scatter matrix is a grid of scatter plots, one for each pair of variables in the data. The diagonal plots are histograms of the corresponding variables. It is also useful for visualizing the distribution of each variable.

Example:

from blitzly.plots.scatter import scatter_matrix
import numpy as np
import pandas as pd

foo = np.random.randn(1000)
bar = np.random.randn(1000) + 1
blitz = np.random.randint(2, size=1000)
licht = np.random.randint(2, size=1000)
data = np.array([foo, bar, blitz, licht])
df = pd.DataFrame(data.T, columns=["foo", "bar", "blitz", "licht"])

scatter_matrix(
    df,
    dimensions=["foo", "bar", "blitz"],
    color_dim=df["licht"],
    title="My first scatter matrix 🙃",
    show_upper_half=True,
    diagonal_visible=False,
    marker_color_scale="Rainbow",
    marker_line_color="blue",
    size=(500, 500),
)

Parameters:

Name	Type	Description	Default
`data`	`pd.DataFrame`	Data to plot.	required
`dimensions`	`Optional[List[str]]`	List of columns to plot. If `None` all columns from the Pandas DataFrame are used.	`None`
`color_dim`	`Optional[Union[pd.Series, List[str], NDArray]]`	Color dimension. If `None` no color is used.	`None`
`show_upper_half`	`bool`	Show upper half of the scatter matrix.	`False`
`diagonal_visible`	`bool`	Show diagonal part of the matrix.	`False`
`show_scale`	`bool`	Show color scale.	`False`
`title`	`str`	Title of the plot.	`'Scatter matrix'`
`marker_line_color`	`str`	Color of the marker line.	`'white'`
`marker_line_width`	`float`	Width of the marker line.	`0.5`
`marker_color_scale`	`str`	Color scale of the markers.	`'Plasma'`
`size`	`Optional[Tuple[int, int]`	Size of the plot.	`None`
`show_legend`	`Optional[bool]`	Whether to show the legend.	`False`
`show`	`bool`	Whether to show the figure.	`True`
`write_html_path`	`Optional[str]`	The path to which the histogram should be written as an HTML file. If None, the histogram will not be saved.	`None`

Returns:

Name	Type	Description
`BaseFigure`	`BaseFigure`	The scatter matrix plot.

Source code in blitzly/plots/scatter.py

def scatter_matrix(
    data: pd.DataFrame,
    dimensions: Optional[List[str]] = None,
    color_dim: Optional[Union[pd.Series, List[str], NDArray]] = None,
    show_upper_half: bool = False,
    diagonal_visible: bool = False,
    show_scale: bool = False,
    title: str = "Scatter matrix",
    marker_line_color: str = "white",
    marker_line_width: float = 0.5,
    marker_color_scale: str = "Plasma",
    size: Optional[Tuple[int, int]] = None,
    show_legend: Optional[bool] = False,
    show: bool = True,
    write_html_path: Optional[str] = None,
) -> BaseFigure:
    """Create a scatter matrix plot. It can be used to visualize the
    relationship between multiple variables. The scatter matrix is a grid of
    scatter plots, one for each pair of variables in the data. The diagonal
    plots are histograms of the corresponding variables. It is also useful for
    visualizing the distribution of each variable.

    Example:
    ```python
    from blitzly.plots.scatter import scatter_matrix
    import numpy as np
    import pandas as pd

    foo = np.random.randn(1000)
    bar = np.random.randn(1000) + 1
    blitz = np.random.randint(2, size=1000)
    licht = np.random.randint(2, size=1000)
    data = np.array([foo, bar, blitz, licht])
    df = pd.DataFrame(data.T, columns=["foo", "bar", "blitz", "licht"])

    scatter_matrix(
        df,
        dimensions=["foo", "bar", "blitz"],
        color_dim=df["licht"],
        title="My first scatter matrix 🙃",
        show_upper_half=True,
        diagonal_visible=False,
        marker_color_scale="Rainbow",
        marker_line_color="blue",
        size=(500, 500),
    )
    ```

    Args:
        data (pd.DataFrame): Data to plot.
        dimensions (Optional[List[str]], optional): List of columns to plot. If `None` all columns from the Pandas DataFrame are used.
        color_dim (Optional[Union[pd.Series, List[str], NDArray]]): Color dimension. If `None` no color is used.
        show_upper_half (bool): Show upper half of the scatter matrix.
        diagonal_visible (bool): Show diagonal part of the matrix.
        show_scale (bool): Show color scale.
        title (str): Title of the plot.
        marker_line_color (str): Color of the marker line.
        marker_line_width (float): Width of the marker line.
        marker_color_scale (str): Color scale of the markers.
        size (Optional[Tuple[int, int]): Size of the plot.
        show_legend (Optional[bool]): Whether to show the legend.
        show (bool): Whether to show the figure.
        write_html_path (Optional[str]): The path to which the histogram should be written as an HTML file.
            If None, the histogram will not be saved.

    Returns:
        BaseFigure: The scatter matrix plot.
    """

    if isinstance(color_dim, np.ndarray) and color_dim.ndim != 1:
        raise ValueError("`color_dim` must be a 1-dimensional array!")

    if dimensions is None:
        dimensions = list(data.columns)

    dims = [dict(label=dim, values=data[dim]) for dim in dimensions]

    _ = check_data(data, min_rows=2, min_columns=2)

    fig = go.Figure(
        data=go.Splom(
            dimensions=dims,
            showupperhalf=show_upper_half,
            marker=dict(
                colorscale=marker_color_scale,
                color=color_dim,
                showscale=show_scale,
                line_color=marker_line_color,
                line_width=marker_line_width,
            ),
            diagonal=dict(visible=diagonal_visible),
        )
    )

    fig = update_figure_layout(fig, title, size, show_legend)
    return save_show_return(fig, write_html_path, show)

Scatter

dimensionality_reduction(data, reduction_funcs, n_components, target_column, title='Dimensionality reduction plot', scaler_func='StandardScaler', size=None, show_legend=None, reduction_func_kwargs=None, plotly_kwargs=None, show=True, write_html_path=None) ¶

multi_scatter(data, x_y_columns, modes=None, title='Scatter plot', size=None, show_legend=True, plotly_kwargs=None, show=True, write_html_path=None) ¶

scatter_matrix(data, dimensions=None, color_dim=None, show_upper_half=False, diagonal_visible=False, show_scale=False, title='Scatter matrix', marker_line_color='white', marker_line_width=0.5, marker_color_scale='Plasma', size=None, show_legend=False, show=True, write_html_path=None) ¶

`dimensionality_reduction(data, reduction_funcs, n_components, target_column, title='Dimensionality reduction plot', scaler_func='StandardScaler', size=None, show_legend=None, reduction_func_kwargs=None, plotly_kwargs=None, show=True, write_html_path=None)` ¶

`multi_scatter(data, x_y_columns, modes=None, title='Scatter plot', size=None, show_legend=True, plotly_kwargs=None, show=True, write_html_path=None)` ¶

`scatter_matrix(data, dimensions=None, color_dim=None, show_upper_half=False, diagonal_visible=False, show_scale=False, title='Scatter matrix', marker_line_color='white', marker_line_width=0.5, marker_color_scale='Plasma', size=None, show_legend=False, show=True, write_html_path=None)` ¶