Smarr et al. 2017 Mouse Data
I recently presented a poster at BMES 2025—a biomedical engineering conference—and listened to a great deal of talks on research being done in the field. Many many amazing talks, but my favorite was “Engineering Methods for Fitting Digital Dynamical Manifolds for Human Health AI”, by Dr. Benjamin Smarr. Afterwards, I looked up his lab and found a fun challenge:
I feel strongly about being supportive of people from all backgrounds; anyone with genuine interest can succeed, whether you have programming experience or not. BUT, you can’t do research here without programming. So please check out these two papers (paper 1, paper 2) and these data, and make sure you can recapitulate some of the analyses before reaching out about a position. You won’t be graded, but we will discuss it; if this is too hard, or if going through it feels so boring that you don’t, then you will have difficulty doing research here.
It sounded like a fun challenge, so I decided to attempt said recapitulation. I downloaded the linked data so I could analyze it in python.
import pandas as pd
df_dict = {
'fLA': pd.read_csv('micedata-FemAct.csv'),
'fCBT': pd.read_csv('micedata-FemTemp.csv'),
'mLA': pd.read_csv('micedata-MaleAct.csv'),
'mCBT': pd.read_csv('micedata-MaleTemp.csv')
}Raw Data
First I’ll plot the temperatuere data.
import numpy as np
import matplotlib.pyplot as plt
def plot_raw_temp(data, columns, title, color='blue'):
fig, ax = plt.subplots(figsize=(12, 6))
for col in columns:
ax.plot(data['time (min)'], data[col], label=col, alpha=0.5)
ax.set_xlabel('Time (min)')
ax.set_ylabel('Temperature (°C)')
ax.set_title(title)
ax.legend(bbox_to_anchor=(1.0, 1), loc='upper left')
ax.grid(True, alpha=0.3)
xticks = np.arange(0, max(data['time (min)']) + 1, 1440)
ax.set_xticks(xticks)
plt.tight_layout()
plt.show()
# Plot female temperatures
fem_data_columns = [f'fem{n}' for n in range(1, 15)]
plot_raw_temp(df_dict['fCBT'], fem_data_columns, 'Female Temperature Data - All Traces')
# Plot male temperatures
male_data_columns = [f'male{n}' for n in range(1, 7)]
plot_raw_temp(df_dict['mCBT'], male_data_columns, 'Male Temperature Data - All Traces')The first linked paper explains that:
For CBT, values below 35 °C were set to 35 °C to remove the result of a few rare device malfunctions and all values more than three standard deviations from the mean were set to three standard deviations from the mean. For LA, the correction to three standard deviations was only applied in the positive direction so that erroneously high values were corrected while activity counts of “0” were not inflated.
Based on the cuttofs in the male data, it appears these corrections have already been applied. This can be quicky verified as well:
# Check Female Temperature data
fem_temp_data = df_dict['fCBT'].drop(columns=['time (min)'])
fem_low_values = (fem_temp_data < 35).any().any()
print(f'fCBT has values < 35: {fem_low_values}')
# Check Male Temperature data
male_temp_data = df_dict['mCBT'].drop(columns=['time (min)'])
male_low_values = (male_temp_data < 35).any().any()
print(f'mCBT has values < 35: {male_low_values}')Now, look at the activity data.
# Plot female activity traces in a grid
fig, axes = plt.subplots(5, 3, figsize=(15, 18), sharey=True)
axes = axes.flatten()
fem_data_columns = [f'fem{n}' for n in range(1, 15)]
time_data = df_dict['fLA']['time (min)']
for i, col in enumerate(fem_data_columns):
axes[i].plot(time_data, df_dict['fLA'][col], linewidth=0.8)
axes[i].set_title(f'{col}', fontsize=10)
axes[i].grid(True, alpha=0.3)
# Set x-axis ticks every 1440 minutes (24 hours)
axes[i].set_xticks(np.arange(0, max(time_data) + 1, 1440))
axes[i].set_xlabel('Time (min)', fontsize=9)
if i % 3 == 0: # Left column
axes[i].set_ylabel('Activity\n(events/min)', fontsize=9)
# Hide the last empty subplot (index 14)
axes[14].set_visible(False)
plt.suptitle('Female Locomotor Activity - Individual Traces', fontsize=14, y=0.995)
plt.tight_layout()
plt.show()
# Plot male activity traces in a grid
fig, axes = plt.subplots(2, 3, figsize=(15, 8), sharey=True)
axes = axes.flatten()
male_data_columns = [f'male{n}' for n in range(1, 7)]
time_data = df_dict['mLA']['time (min)']
for i, col in enumerate(male_data_columns):
axes[i].plot(time_data, df_dict['mLA'][col], linewidth=0.8)
axes[i].set_title(f'{col}', fontsize=10)
axes[i].grid(True, alpha=0.3)
# Set x-axis ticks every 1440 minutes (24 hours)
axes[i].set_xticks(np.arange(0, max(time_data) + 1, 1440))
axes[i].set_xlabel('Time (min)', fontsize=9)
if i % 3 == 0: # Left column
axes[i].set_ylabel('Activity\n(events/min)', fontsize=9)
plt.suptitle('Male Locomotor Activity - Individual Traces', fontsize=14, y=0.98)
plt.tight_layout()
plt.show()For LA, the correction to three standard deviations was only applied in the positive direction so that erroneously high values were corrected while activity counts of “0” were not inflated.
The peaks on many of these plot look chopped off. While the original mean and standard deviation can’t be calculated from chopped data, I’ll assume this was already corrected as well.
Next, the paper says:
Daily range for each modality was defined as (max-min) per mouse per day. Median 4-day windows (i.e., cycles in females) were generated for each animal by taking the average of the three repeated cycles, followed by taking the average of these 4-day windows across individuals of the same sex. Inter-animal variability was defined as the population range for each modality’s daily range.
From the plots above, the data begins right as CBT and LA increase, so it’s reasonable to assume the data has also been aligned to the beginning of a day.
Since the data is already cleaned, I can move to creating the four day averages described.
Four Day Averages
min_4_days = 5760
def process_df(df):
remove_time_col = list(df.columns)[1:]
data_df = df[remove_time_col].copy()
top_half = data_df.iloc[:min_4_days].values
bottom_half = data_df.iloc[min_4_days:].values
averaged_halves = (top_half + bottom_half) / 2
final_result_array = averaged_halves.mean(axis=1)
return final_result_array
processed_dict = {'time (min)': np.arange(1, 5761)}
for name, df in df_dict.items():
four_day_average = process_df(df)
processed_dict[name] = four_day_average
processed_df = pd.DataFrame(processed_dict)
time = processed_df['time (min)']
# Temperature plot
plt.figure(figsize=(12, 5))
plt.plot(time, processed_df['fCBT'], label='Female CBT', color='C0', linewidth=1.5, alpha=0.8)
plt.plot(time, processed_df['mCBT'], label='Male CBT', color='C1', linewidth=1.5, alpha=0.8)
plt.ylabel('Temperature (°C)')
plt.title('Four-day averages: Temperature')
plt.legend()
plt.grid(True, which='both', axis='x', linestyle=':', linewidth=0.5)
plt.xticks(np.arange(0, max(time) + 1, 1440))
plt.tight_layout()
plt.show()
# Activity plot
plt.figure(figsize=(12, 5))
plt.plot(time, processed_df['fLA'], label='Female LA', color='C2', linewidth=1)
plt.plot(time, processed_df['mLA'], label='Male LA', color='C3', linewidth=1, alpha=0.8)
plt.xlabel('Time (min)')
plt.ylabel('Activity (events per minute)')
plt.title('Four-day averages: Activity')
plt.legend()
plt.grid(True, which='both', axis='x', linestyle=':', linewidth=0.5)
plt.xticks(np.arange(0, max(time) + 1, 1440))
plt.tight_layout()
plt.show()For wavelet transforms, I’ll use ssqueezepy.
Wavelet Analysis
from ssqueezepy import Wavelet, cwt, imshow
from ssqueezepy.experimental import scale_to_freq
from matplotlib.colors import LinearSegmentedColormap
def morse_cwt(
x,
beta=5,
gamma=3,
scale_type='log',
sample_rate=60,
wavelets_per_octave=32
):
x = np.asarray(x)
num_samples = len(x)
wavelet = Wavelet(('gmw', {'beta': beta, 'gamma': gamma}))
Wx, scales = cwt(
x,
wavelet,
scales=scale_type,
fs=sample_rate,
nv=wavelets_per_octave
)
freqs = scale_to_freq(scales, wavelet, num_samples, fs=sample_rate)
t = np.linspace(0., num_samples/sample_rate, num_samples)
return Wx, freqs, t
def plot_scalogram(W, freqs, t, title):
periods = 1 / freqs # Convert frequency to period
W_flipped = np.flipud(W)
periods_flipped = periods[::-1]
# Make scalogram
fig, ax = plt.subplots(figsize=(10, 6))
imshow_kwargs = dict(
abs=1, xticks=t, xlabel="Time", ylabel="Period",
title=title
)
colors = ['cyan', 'blue', 'red', 'white']
positions = [0, 0.1, 0.5, 1.0]
n_bins = 256
custom_cmap = LinearSegmentedColormap.from_list(
'cyan_blue_red_white',
list(zip(positions, colors)),
N=n_bins
)
imshow(W_flipped, **imshow_kwargs, yticks=periods_flipped, ax=ax, show=0, cmap=custom_cmap)
im = ax.get_images()[0]
cbar = plt.colorbar(im, ax=ax, orientation='horizontal', pad=0.1, shrink=0.8)
cbar.set_label('Magnitude')
plt.tight_layout()
plt.show()
for col in ['fLA', 'fCBT', 'mLA', 'mCBT']:
W, freqs, t = morse_cwt(processed_df[col].to_numpy())
plot_scalogram(W, freqs, t, col)