Source code for ccxt_pandas.calculations.orderbook

"""Order book analysis and depth calculations."""

import pandas as pd



[docs]
def create_mirrored_sides(
    data: pd.DataFrame, sides: tuple[str, str] = ("buy", "sell")
) -> pd.DataFrame:
    """Create mirrored order book sides for testing or simulation.

    Takes a DataFrame and duplicates it for each side, useful for creating
    synthetic order books or testing strategies with symmetric orders.

    Args:
        data: DataFrame with order data (price, qty, etc.)
        sides: Tuple of side values to create (default: ("buy", "sell"))

    Returns:
        DataFrame with rows duplicated for each side

    Examples:
        >>> # Create buy and sell orders from a single set
        >>> orders = pd.DataFrame({'price': [100, 101], 'qty': [1.0, 2.0]})
        >>> mirrored = create_mirrored_sides(orders)
        >>> print(mirrored)
           price  qty  side
        0    100  1.0   buy
        1    101  2.0   buy
        0    100  1.0  sell
        1    101  2.0  sell

    Notes:
        - Useful for creating symmetric order books
        - Does not modify price levels (you may want to mirror prices around mid)
        - Original index is preserved (use ignore_index=True in concat if needed)
    """
    dfs = []
    for side in sides:
        side_df = data.copy()
        side_df["side"] = side
        dfs.append(side_df)
    return pd.concat(dfs, ignore_index=True)




[docs]
def is_ask_side(data: pd.DataFrame) -> pd.Series:
    """Identify ask side rows in order book.

    Handles both order book format ('asks'/'bids') and order format ('sell'/'buy').

    Args:
        data: DataFrame with 'side' column

    Returns:
        Boolean Series where True indicates ask/sell side

    Examples:
        >>> orderbook = pd.DataFrame({'side': ['bids', 'asks', 'bids']})
        >>> is_ask_side(orderbook)
        0    False
        1     True
        2    False
        dtype: bool

        >>> orders = pd.DataFrame({'side': ['buy', 'sell', 'buy']})
        >>> is_ask_side(orders)
        0    False
        1     True
        2    False
        dtype: bool

    Notes:
        - Returns True for 'asks' or 'sell'
        - Returns False for 'bids' or 'buy'
    """
    # Handle both orderbook sides (asks/bids) and order sides (sell/buy)
    return data["side"].isin(["asks", "sell"])




[docs]
def side_sign(data: pd.DataFrame) -> pd.Series:
    """Get directional sign for order book sides.

    Returns +1 for asks/sell (taking liquidity), -1 for bids/buy (providing liquidity).

    Args:
        data: DataFrame with 'side' column

    Returns:
        Series with +1 for asks/sell, -1 for bids/buy

    Examples:
        >>> orderbook = pd.DataFrame({'side': ['bids', 'asks', 'bids']})
        >>> side_sign(orderbook)
        0   -1
        1    1
        2   -1
        dtype: int64

    Notes:
        - Useful for signed price calculations
        - Convention: asks = +1, bids = -1
    """
    return 2 * is_ask_side(data).astype(int) - 1




[docs]
def signed_price(data: pd.DataFrame, price_col: str = "price") -> pd.Series:
    """Calculate signed price based on side.

    Multiplies price by side sign (+1 for asks, -1 for bids), useful for
    sorting order books or calculating spreads.

    Args:
        data: DataFrame with 'side' and price columns
        price_col: Name of price column (default: 'price')

    Returns:
        Series with signed prices

    Examples:
        >>> orderbook = pd.DataFrame({
        ...     'side': ['bids', 'asks'],
        ...     'price': [99.5, 100.5]
        ... })
        >>> signed_price(orderbook)
        0    -99.5
        1    100.5
        dtype: float64

    Notes:
        - Bids get negative prices
        - Asks get positive prices
        - Useful for sorting: best bid (highest) and best ask (lowest) both sort first
    """
    return side_sign(data) * data[price_col]




[docs]
def sort_orderbook(
    data: pd.DataFrame,
    by_symbol: bool = True,
    by_exchange: bool = False,
    price_col: str = "price",
) -> pd.DataFrame:
    """Sort order book by symbol, side, and price levels.

    Sorts so that best bid (highest price) and best ask (lowest price) appear
    first within each symbol/exchange group.

    Args:
        data: Order book DataFrame with 'symbol', 'side', and price columns
        by_symbol: Include symbol in sort (default: True)
        by_exchange: Include exchange in sort (default: False)
        price_col: Name of price column (default: 'price')

    Returns:
        Sorted DataFrame with best prices first

    Examples:
        >>> orderbook = pd.DataFrame({
        ...     'symbol': ['BTC/USDT', 'BTC/USDT', 'BTC/USDT'],
        ...     'side': ['asks', 'bids', 'asks'],
        ...     'price': [101, 99, 100],
        ...     'qty': [1.0, 2.0, 1.5]
        ... })
        >>> sorted_ob = sort_orderbook(orderbook)
        >>> # Result: bids@99 (best bid), asks@100 (best ask), asks@101

    Notes:
        - Uses signed price for correct sorting
        - Best bid = highest bid price = most negative signed price
        - Best ask = lowest ask price = least positive signed price
        - Reset index after sort (ignore_index=True)
    """
    result = data.copy()
    result["_signed_price"] = signed_price(result, price_col)

    sort_cols = []
    if by_exchange:
        sort_cols.append("exchange")
    if by_symbol:
        sort_cols.append("symbol")
    sort_cols.extend(["side", "_signed_price"])

    result = result.sort_values(sort_cols, ignore_index=True)
    return result.drop(columns=["_signed_price"])




[docs]
def calculate_notional(
    data: pd.DataFrame, price_col: str = "price", qty_col: str = "qty"
) -> pd.Series:
    """Calculate notional value (price × quantity).

    Args:
        data: DataFrame with price and quantity columns
        price_col: Name of price column (default: 'price')
        qty_col: Name of quantity column (default: 'qty')

    Returns:
        Series with notional values

    Examples:
        >>> orderbook = pd.DataFrame({'price': [100, 101], 'qty': [1.5, 2.0]})
        >>> calculate_notional(orderbook)
        0    150.0
        1    202.0
        dtype: float64

    Notes:
        - Notional = price × quantity
        - Represents total value at each price level
        - Used in VWAP and depth calculations
    """
    return data[price_col] * data[qty_col]




[docs]
def calculate_vwap_by_depth(
    data: pd.DataFrame,
    depths: list | tuple | set,
    group_by: list[str] | None = None,
    price_col: str = "price",
    qty_col: str = "qty",
) -> pd.DataFrame:
    """Calculate Volume-Weighted Average Price (VWAP) at various depth levels.

    Computes the average execution price for buying/selling up to a certain
    notional depth. Useful for estimating market impact and slippage.

    Args:
        data: Order book DataFrame with price, qty, symbol, and side columns
        depths: List of notional depths to calculate VWAP for (e.g., [1000, 5000, 10000])
        group_by: Columns to group by (default: ['symbol', 'side'] + 'exchange' if present)
        price_col: Name of price column (default: 'price')
        qty_col: Name of quantity column (default: 'qty')

    Returns:
        DataFrame with columns: [*group_by, depth, qty, notional, price]
        where 'price' is the VWAP at each depth level

    Examples:
        >>> # Calculate VWAP for buying $1000 and $5000 worth
        >>> orderbook = pd.DataFrame({
        ...     'symbol': ['BTC/USDT'] * 4,
        ...     'side': ['asks'] * 4,
        ...     'price': [100, 101, 102, 103],
        ...     'qty': [10, 10, 10, 10]
        ... })
        >>> vwap = calculate_vwap_by_depth(orderbook, depths=[500, 1500])
        >>> # Shows average price to buy $500 worth (≈100) and $1500 worth (≈101)

    Notes:
        - VWAP = total notional / total quantity
        - Accounts for partial fills at the last price level
        - Order book should be pre-sorted (use sort_orderbook first)
        - Higher depths = more market impact = higher VWAP for asks, lower for bids
        - Useful for:
          - Market impact analysis
          - Slippage estimation
          - Liquidity assessment
          - Order size optimization
    """
    if group_by is None:
        group_by = ["symbol", "side"]
        if "exchange" in data.columns:
            group_by.append("exchange")

    result = data.copy()

    # Calculate notional value at each level
    result["notional"] = calculate_notional(result, price_col, qty_col)

    # Calculate cumulative notional within each group
    result["cum_notional"] = result.groupby(group_by)["notional"].cumsum()

    # Create a row for each depth level
    depth_dfs = []
    for depth in depths:
        depth_df = result.copy()
        depth_df["depth"] = depth
        depth_dfs.append(depth_df)

    # Combine all depth levels
    all_depths = pd.concat(depth_dfs, ignore_index=True)

    # Filter to rows within depth (cum_notional - notional <= depth)
    # This includes the last level that crosses the depth threshold
    within_depth = all_depths.query("(cum_notional - notional) <= depth").copy()

    group_by_with_depth = group_by + ["depth"]

    # Adjust notional for the last level (partial fill)
    # If cum_notional > depth, only use (depth - previous_cum_notional)
    within_depth["notional"] = within_depth["notional"].where(
        within_depth["cum_notional"] <= within_depth["depth"],
        other=(
            within_depth["depth"]
            - within_depth.groupby(group_by_with_depth)["cum_notional"].shift(1).fillna(0)
        ),
    )

    # Recalculate qty based on adjusted notional
    within_depth[qty_col] = within_depth["notional"] / within_depth[price_col]

    # Aggregate by group and depth
    vwap = within_depth.groupby(group_by_with_depth, as_index=False)[[qty_col, "notional"]].sum()

    # Calculate VWAP
    vwap[price_col] = vwap["notional"] / vwap[qty_col]

    return vwap




[docs]
def calculate_mid_price_and_spread(
    data: pd.DataFrame,
    price_col: str = "price",
    by_exchange: bool = False,
) -> pd.DataFrame:
    """Calculate mid price, spread, and relative spread from order book for all symbols.

    Computes best bid/ask, mid price, absolute spread, and relative spread for each
    symbol (and optionally exchange) using pandas pivot for efficient calculation.

    Args:
        data: Order book DataFrame (should be sorted)
        price_col: Name of price column (default: 'price')
        by_exchange: Include exchange in grouping (default: False)

    Returns:
        DataFrame with columns: bid, ask, mid_price, spread, relative_spread
        Indexed by symbol (and optionally exchange)

    Examples:
        >>> orderbook = pd.DataFrame({
        ...     'symbol': ['BTC/USDT', 'BTC/USDT', 'ETH/USDT', 'ETH/USDT'],
        ...     'side': ['bids', 'asks', 'bids', 'asks'],
        ...     'price': [99.5, 100.5, 1800, 1820],
        ...     'qty': [10, 10, 5, 5]
        ... })
        >>> result = calculate_mid_price_and_spread(orderbook)
        >>> print(result)
                    bid     ask  mid_price  spread  relative_spread
        symbol
        BTC/USDT   99.5   100.5      100.0     1.0         0.010000
        ETH/USDT  1800.0  1820.0     1810.0    20.0         0.011050

        >>> # Access specific columns
        >>> result['mid_price']
        symbol
        BTC/USDT     100.0
        ETH/USDT    1810.0

        >>> result['spread']
        symbol
        BTC/USDT     1.0
        ETH/USDT    20.0

    Notes:
        - Order book should be sorted first (use sort_orderbook)
        - Uses pandas pivot for efficient calculation
        - mid_price = (bid + ask) / 2
        - spread = ask - bid (absolute)
        - relative_spread = spread / mid_price (as decimal, multiply by 100 for %)
        - Tighter spreads indicate higher liquidity
    """
    group_cols = (
        ["exchange", "symbol"] if by_exchange and "exchange" in data.columns else ["symbol"]
    )
    result = data.copy()
    result["side"] = result["side"].map({"asks": "ask", "bids": "bid"})
    result = result.drop_duplicates(subset=group_cols + ["side"], keep="first").pivot(
        index=group_cols, columns="side", values=price_col
    )
    result["mid_price"] = (result["bid"] + result["ask"]) / 2
    result["spread"] = result["ask"] - result["bid"]
    result["relative_spread"] = result["spread"] / result["mid_price"]
    return result[["bid", "ask", "mid_price", "spread", "relative_spread"]]