USA equity market#

In 1926 the Stardard Statistics Company developed a 90-stock index. On March 4, 1957, Standard and Poor’s - the name of the company after the merging of the SSC and the Poor’s Publishing - expanded the index to the current number of 500 companies, renamed the S&P500 Stock Composite Index.

On August 31, 1976, The Vanguard Group launched the first mutual fund to retail investors that tracked the index.

Table of contents.

  • Return over different time frames, both in nominal and real value

  • Sectors

Hide code cell source
import numpy as np

import pandas as pd
# from IPython.display import display  # import not required, maybe

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import plotly.io as pio
#> Mount GDrive if required - for Colab editing
from pathlib import Path

# Detect Colab and mount Drive if needed
try:
    from google.colab import drive
    IN_COLAB = True
    pio.renderers.default = "colab"
except ImportError:
    IN_COLAB = False
    pio.renderers.default = "iframe"

if IN_COLAB:
    drive.mount('/content/drive')
    %cd /content/drive/MyDrive/basics-books/repos/financial-edu/bbooks-financial-edu/ch/history

FILE_ROOT = Path.cwd()
# DATA_DIR = REPO_ROOT / 'data'

print("Repo root:", FILE_ROOT)
# print("Data dir:", DATA_DIR)
Repo root: /home/davide/Documents/basics-books/books/bbooks-financial-edu/ch/history

Return#

  • Time history of the total return index

  • Return over different time spans: probability function and statistics, composition of returns

  • Volatility: drag, volatility clusters,…

Total Return - Shiller data#

Shiller data, from 1871 to 2025

Hide code cell source
#> Import Shiller data
shiller_filen = str(FILE_ROOT) + "/../../code/data/shiller/ie_data.xlsx"
df = pd.read_excel(shiller_filen, sheet_name="Data", skiprows=7,) # engine="xlrd")

#> Clean data
#> Remove non-data last row
df = df.iloc[:-1].copy()

#> Convert Data to proper datetime type
df["Date"] = df["Date"].map(lambda x: f"{x:.2f}")
df["Date"] = pd.to_datetime( df["Date"], format="%Y.%m")

#> Column names
# P: S&P Comp. P
# Price: Real Price
# Price.1: Real Total Return Price
# print(df.columns)

# display(df.head())
# display(df.tail())
Hide code cell source
#> S&P 500 index (even if the S&P500 was created in 1957; blended with some other index?)
#> Plots: (2,1) subplot: density and cumulative probability
fig = make_subplots(
    rows=2, cols=2,
    shared_xaxes=True,
    vertical_spacing=0.1,
    # subplot_titles=("Monthly Returns Probability Density", "Cumulative Probability")
    subplot_titles=(
        "S&P500 Index", "CPI", \
        "S&P500 Index - Real", "S&P500 Total Return - Real"
    )
)

#> S&P500 index
fig.add_trace(go.Scatter(x=df["Date"], y=df["P"], name="S&P 500", line=dict(color="black")),
    row=1, col=1
)
fig.add_trace(go.Scatter(x=df["Date"], y=df["CPI"], name="CPI", line=dict(color="black")),
    row=1, col=2
)
fig.add_trace(go.Scatter(x=df["Date"], y=df["Price"], name="Real S&P500 Index", line=dict(color="black")),
    row=2, col=1
)
fig.add_trace(go.Scatter(x=df["Date"], y=df["Price.1"], name="Real Total Return", line=dict(color="black")),
    row=2, col=2
)

fig.update_yaxes(type="log", row=1, col=1)
fig.update_yaxes(type="log", row=1, col=2)
fig.update_yaxes(type="log", row=2, col=1)
fig.update_yaxes(type="log", row=2, col=2)

fig.update_layout(
    showlegend=False,
)

fig.show()
Hide code cell source
#> Monthly returns
df["return_month"] = df["Price.1"].pct_change()
mu, sig2 = df["return_month"].mean(), df["return_month"].var()

#> Statistics
print(df["return_month"].describe())
print(f"Skewness: {df['return_month'].skew()}")
print(f"Kurtosis: {df['return_month'].kurtosis()}")

#> Plots: (2,1) subplot: density and cumulative probability
fig = make_subplots(
    rows=2, cols=1,
    shared_xaxes=True,
    vertical_spacing=0.1,
    # subplot_titles=("Monthly Returns Probability Density", "Cumulative Probability")
)

# Top: probability density histogram
fig.add_trace(
    go.Histogram(
        x=df["return_month"].dropna(),
        # nbinsx=50,
        histnorm='probability',  # normalize
        name='Density',
        # marker_color='steelblue',
        opacity=0.7
    ),
    row=1, col=1
)

# x_plot = np.linspace(min(df['return_month'].dropna()),max(df['return_month'].dropna()), 100)
# print(x_plot)
# fig.add_trace(
#     go.Scatter(
#         x=x_plot,
#         y=np.exp(-(x_plot-mu)**2/(2.*sig2)) / np.sqrt(2*np.pi*sig2)
#     ),
#     row=1, col=1
# )

# Bottom: cumulative histogram
fig.add_trace(
    go.Histogram(
        x=df["return_month"].dropna(),
        # nbinsx=50,
        cumulative_enabled=True,       # cumulative
        histnorm='probability',        # normalize cumulative to 1
        name='Cumulative',
        # marker_color='orange',
        opacity=0.7
    ),
    row=2, col=1
)

# Layout tweaks
fig.update_layout(
    title="S&P 500 Monthly Returns: Probability Density & Cumulative Probability",
    # width=800, height=600,
    # xaxis_title="Monthly Return",
    yaxis_title="Density",
    xaxis2_title="Monthly Return",
    yaxis2_title="Cumulative Probability",
    template="plotly_white",
    bargap=0.1,
    showlegend=False
)

fig.show()
count    1854.000000
mean        0.006515
std         0.040827
min        -0.261879
25%        -0.013347
50%         0.009404
75%         0.029377
max         0.524294
Name: return_month, dtype: float64
Skewness: 0.5564109443798456
Kurtosis: 17.94821481233623

Sectors#

Message: market composition changes - rise and fall of dominant sectors; diversification can disappear as market concentration in one or few sectors increases; survivorship and return bias.

Methods:

  • Evolution of largest companies of a time range over longer time ranges

  • SPDR sector ETFs, available since 1998/12

References:

References#