Chapter 13

Data

Chapter 13 uses the same data as for Chapter 12

Alternatively, individual files can be found in the Data section.

Code from chapter

'''
Find the peaks of the reduction and oxidation waves in a CV experiment 
and calculate E_1/2
    Requires: CV data in a .csv file with reduction first then oxidation
    Written by: Chris Johnson and Ben Lear (authors@codechembook.com)
    v1.0.0 - 250322 - initial version
    v1.1.0 - 250323 - refactored to make AnalyzeCV function importable
'''
import numpy as np
import scipy.signal as sps
from plotly.subplots import make_subplots
from codechembook.quickTools import quickOpenFilename, quickPopupMessage
from codechembook.symbols.greek import Delta
from codechembook.symbols.typesettingHTML import textsub, textit

def SplitWaves(E, I):
    '''
    Split E and I arrays into separate reduction and oxidaton E and I arrays

    REQUIRED PARAMETERS:
    E (ndarray): potential data points
    I (ndarray): current data points

    RETURNS:
    (ndarray): E_red, reduction wave potential data points
    (ndarray): I_red, reduction wave current data points
    (ndarray): E_ox, oxidation wave potential data points
    (ndarray): I_ox, oxidation wave current data points
    '''
    # Find the index of the scan turnaround to separate the oxidation and reduction waves
    i = 1 # a counter variable
    stop = False # will change to true to stop the loop
    while i < len(E) and stop == False: # loop until we reach the end or find the turnaround
        if E[i] < E[i-1]: # as long as E is reducing, then we are on reduction
            i += 1
        else: # E started to increase so this must be the turnaround
            stop = True
        
    # Get separate arrays for reduction and oxidation
    E_red, I_red = E[:i], I[:i] # reduction is simple
    if E[i] == E[i+1]: # check special case where two points are the same at the turnaround
        E_ox, I_ox = E[i:], I[i:]
    else: # only one point at the turnaround
        E_ox, I_ox = E[i-1:], I[i-1:] # we want to include the turnaround point too, thus i-1
    
    return E_red, I_red, E_ox, I_ox

def AnalyzeCV(E_red, I_red, E_ox, I_ox):
    '''
    Find the peaks of reduction and oxidation waves and get E_1/2
    
    REQUIRED PARAMS:
    E_red (ndarray): reduction wave E points
    I_red (ndarray): reduction wave I points
    E_ox (ndarray):  oxidation wave E points
    I_ox (ndarray):  oxidation wave I points
    
    RETURNS:
    (float): E_pa, the peak of the anodic wave
    (float): E_pc, the peak of the cathodic wave
    (float): delta_E, the difference between the peaks
    (float): E_1/2, the reduction potential
    (int):   E_pa_index, the index of the peak of the anodic wave
    (int):   E_pc_index, the index of the peak of the cathodic wave
    '''
    
    # Find peaks in reduction and oxidation regions
    E_pa_index, E_pa_dict = sps.find_peaks(-1*I_red, prominence=0.1*np.max(E_red))
    E_pc_index, E_pc_dict = sps.find_peaks(I_ox, prominence=0.1*np.max(I_ox))
    
    # Calculate E_pa and E_pc
    E_pa = E_red[E_pa_index[0]]
    E_pc = E_ox[E_pc_index[-1]]
    
    # Calculate delta E and E1/2
    delta_E = np.abs(E_pa - E_pc)
    E1_2 = (E_pa + E_pc) * 0.5
    
    return E_pa, E_pc, delta_E, E1_2, E_pa_index, E_pc_index

if __name__ == '__main__':
    # Find the data file
    quickPopupMessage(message = 'Select a file with CV data.')
    filename = quickOpenFilename(filetypes = 'CSV files, *.csv')

    # Read the data - reduction and oxidation waves are included here
    E, I = np.genfromtxt(filename, delimiter = ',', skip_header = 1, unpack = True)
    
    # Get separate E and I arrays for oxidation and reduction
    E_red, I_red, E_ox, I_ox = SplitWaves(E, I)

    # Call the analysis function
    E_pa, E_pc, delta_E, E1_2, E_pa_index, E_pc_index = AnalyzeCV(E_red, I_red, E_ox, I_ox)
    
    # Print the results
    print(f'E_pa = {E_red[E_pa_index[0]]:.3f} V')
    print(f'E_pc = {E_ox[E_pc_index[0]]:.3f} V')
    print(f'{Delta}E = {np.abs(E_pa - E_pc):.3f} V, E1/2 = {(E_pa + E_pc)*.5:.3f} V')
    
    # Plot the results
    fig = make_subplots()
    fig.add_scatter(x = E_red, y = I_red, name = f'I{textsub("red")}', 
                    line = dict(color = 'red'))
    fig.add_scatter(x = E_ox, y = I_ox, name = f'I{textsub("ox")}', 
                    line = dict(color = 'blue'))
    fig.add_scatter(x = E_red[E_pa_index], y = I_red[E_pa_index], 
                    name = f'E{textsub("pa")}', mode = 'markers', 
                    marker = dict(color = 'red', size = 10))
    fig.add_scatter(x = E_ox[E_pc_index], y = I_ox[E_pc_index], 
                    name = f'E{textsub("pc")}', mode = 'markers', 
                    marker = dict(color = 'blue', size = 10))

    # Format the plot and display
    fig.update_xaxes(title = f'{textit("E")} /V', tickformat = '0.1f')
    fig.update_yaxes(title = f'{textit("i")} /A')
    fig.update_layout(template = 'simple_white', font_family = 'arial', 
                    font_size = 18, width = 3 * 300, height = 2 * 300, 
                    margin = dict(b = 10, t = 30, l = 10, r = 10))
    fig.show('png')
    fig.write_image('CVpeaks.png')

'''
Testing numerical derivatives by taking the derivative of a cos function
    Written by: Chris Johnson and Ben Lear (authors@codechembook.com)
    v1.0.0 - 250427 - initial version
'''
import numpy as np
from plotly.subplots import make_subplots

# Sample a cos function and its analytical derivative, sin, over one period
x = np.linspace(0, 2*np.pi, 20)
y = np.cos(x)
dy_analytical = -1 * np.sin(x)

# Compute the numerical derivative using 1st and 2nd order edges
dy_gradient_1 = np.gradient(y, x, edge_order = 1)
dy_gradient_2 = np.gradient(y, x, edge_order = 2)

# Plot all the curves to compare
fig = make_subplots()
fig.add_scatter(x = x, y = y, name = 'cos(x)', line = dict(color = 'black'), )
fig.add_scatter(x = x, y = dy_gradient_1, name = 'Gradient, order 1', 
                line = dict(color = 'gray', width = 12),)
fig.add_scatter(x = x, y = dy_gradient_2, name = 'Gradient, order 2', 
                line = dict(color = 'black', dash = 'solid', width = 8),)
fig.add_scatter(x = x, y = dy_analytical, name = 'd(cos(x))/dx', 
                line = dict(color = 'lightgrey', dash = 'dot', width = 4), )

fig.update_yaxes(title = 'y')
fig.update_xaxes(title = 'x')

fig.update_layout(template = 'simple_white', font_size = 18, 
                  legend = dict(x = 1, y = 0, xanchor = 'right'),
                  width = 3 * 300, height = 2 * 300, 
                  margin = dict(b = 10, t = 30, l = 10, r = 10))
fig.show('png')
fig.write_image(format = 'png', file = 'CosDerivativeExample.png')

'''
Get E_1/2, i_pa, and i_pc for a CV experiment
    Requires: CVPeakFind_refactored.py, CV data in a .csv file with 
              reduction first then oxidation
    Written by: Chris Johnson and Ben Lear (authors@codechembook.com)
    v1.0.0 - 250427 - initial version
'''
import numpy as np
from plotly.subplots import make_subplots
from lmfit.models import LinearModel
from codechembook.quickTools import quickOpenFilename, importFromPy, quickPopupMessage
from codechembook.symbols.greek import Delta
from codechembook.symbols.typesettingHTML import textsub, textit
importFromPy('CVPeakFind_refactored.py', 'SplitWaves', 'AnalyzeCV')

# Get the capacative contribution to the current
def getCapCharge(I, E, E_peak):
    '''
    Compute the capacative charging component of the i vs. E wave
    
    REQUIREDPARAMS:
    I (ndarray):    the current data points
    E (ndarray):    the potential data points
    E_peak (float): the potential of the peak in the wave
    
    RETURNS:
    (ndarray): cap, the linear extrapolated capacative current at each E
    (ndarray): dI, the derivative of the current
    (ndarray): d2I, the second derivative of the current
    (ndarray): std_d2I, the standard deviation over a subset of E
    (ndarray): std_valid, the points at which the std is evaluated 
    '''
    
    # Take the first and second derivatives
    dI = np.gradient(I, E)
    d2I = np.gradient(dI, E)
    
    # Estimate the standard deviation of the second derivative of the current 
    # at each point by calculating it in a moving window
    window = 5
    std_d2I = np.array([np.std(d2I[i-window:i+window]) for i in np.arange(10, E_peak)])
    
    # Find the indicies of points at which the standard deviation is less than twice the minimum
    std_valid = np.array([i for i in range(len(std_d2I)) if std_d2I[i] < 2*np.min(std_d2I)])
    
    # Find the first contiguous range of points with a low standard deviation
    # First we calculate the difference between the indicies calculated above
    # and the indicies we would expect if every point was contiguous
    std_valid_shift = std_valid - np.arange(np.min(std_valid), 
                                            np.min(std_valid) + len(std_valid))
    
    # Select only the indicies where the difference is zero i.e. the range was contiguous
    deriv_indicies = [val for val, test in zip(std_valid, std_valid_shift) if test == 0]

    # Fit that range to a linear model
    fit = LinearModel()
    fit_results = fit.fit(I[deriv_indicies], x = E[deriv_indicies])
    
    # Compute the capacative charging compoenent
    cap = fit_results.best_values['slope'] * E + fit_results.best_values['intercept']
    return cap

# Find the data file
quickPopupMessage(message = 'Select a file with CV data.')
filename = quickOpenFilename(filetypes = 'CSV files, *.csv')

# Read the data - reduction and oxidation waves are included here
E, I = np.genfromtxt(filename, delimiter = ',', skip_header = 1, unpack = True)

# Get separate E and I arrays for oxidation and reduction
E_red, I_red, E_ox, I_ox = SplitWaves(E, I)

# call the analysis function
E_pa, E_pc, delta_E, E1_2, E_pa_index, E_pc_index = AnalyzeCV(E_red, I_red, E_ox, I_ox)

# Get the capacative currents for both waves
cap_red = getCapCharge(I_red, E_red, E_pa_index[0])
cap_ox = getCapCharge(I_ox, E_ox, E_pc_index[0])

# Print the results
print(f'Epa = {E_red[E_pa_index[0]]:.3f} V, ipa {I_red[E_pa_index[0]] - cap_red[E_pa_index[0]]:9.3e} A')
print(f'Epc = {E_ox[E_pc_index[0]]:.3f} V, ipc {I_ox[E_pc_index[0]] - cap_ox[E_pc_index[0]]:9.3e} A')
print(f'{Delta}E = {np.abs(E_pa - E_pc):.3f} V, E1/2 = {(E_pa + E_pc)*.5:.3f} V')

# Plot the results
fig = make_subplots()
fig.add_scatter(x = E_red, y = I_red, name = f'I{textsub("red")}', 
                line = dict(color = 'red'))
fig.add_scatter(x = E_ox, y = I_ox, name = f'I{textsub("ox")}', 
                line = dict(color = 'blue'))
fig.add_scatter(x = [E_red[E_pa_index]], y = [I_red[E_pa_index]], 
                name = f'E{textsub("pa")}', mode = 'markers', 
                marker = dict(color = 'red', size = 10))
fig.add_scatter(x = [E_ox[E_pc_index]], y = [I_ox[E_pc_index]], 
                name = f'E{textsub("pc")}', mode = 'markers', 
                marker = dict(color = 'blue', size = 10))
fig.add_scatter(x = E_red, y = cap_red, 
                showlegend = False, line = dict(color = 'red', dash = 'dash'))
fig.add_scatter(x = E_ox, y = cap_ox, 
                showlegend = False, line = dict(color = 'blue', dash = 'dash'))

# Format the plot and display
fig.update_xaxes(title = f'{textit("E")} /V', tickformat = '0.1f')
fig.update_yaxes(title = f'{textit("i")} /A')
fig.update_layout(template = 'simple_white', font_family = 'arial', 
                font_size = 18, width = 3 * 300, height = 2 * 300, 
                margin = dict(b = 10, t = 30, l = 10, r = 10))
fig.show('png')
fig.write_image('CVpeaks.png')

Solutions to Exercises

Targeted exercises

Refactoring code to turn it into functions you can reuse

Exercise 0

Refactor the final code from Chapter 1. Create separate functions to handle the printing output, the calcPlateVols() functionality, and defining the geometry and concentrations of the well plate. The latter should take as arguments: the number of rows, the number of columns and the starting and ending concentrations and ionic strengths.

import numpy as np

# Function to generate well plate concentration matrices
def generate_wellplate_conditions(rows, cols, conc_cat, conc_HCl_start, conc_HCl_end, I_start, I_end):
# Catalyst: constant across plate
cat = conc_cat * np.ones((rows, cols))

# HCl gradient: across columns
MHCl_row = np.linspace(conc_HCl_start, conc_HCl_end, cols)
MHCl_col = np.ones(rows)
MHCl = np.outer(MHCl_col, MHCl_row)

# Ionic strength gradient: across rows
ionic_row = np.ones(cols)
ionic_col = np.linspace(I_start, I_end, rows)
ionic = np.outer(ionic_col, ionic_row)

return cat, MHCl, ionic

# Function to calculate required stock volumes
def calculate_volumes(conc_cat, conc_HCl, I, vol_final=1.0):
# Stock concentrations (M)
stock_cat = 0.1
stock_HCl = 6
stock_NaCl = 3

# Volume calculations (mL)
vol_cat = conc_cat / stock_cat * vol_final
vol_HCl = conc_HCl / stock_HCl * vol_final
vol_NaCl = (I - conc_HCl) / stock_NaCl * vol_final
vol_water = vol_final - vol_cat - vol_HCl - vol_NaCl

return vol_cat, vol_HCl, vol_NaCl, vol_water

# Function to print volumes in a readable format
def print_volumes(vol_cat, vol_HCl, vol_NaCl, vol_water):
print("[ ] Catalyst solution (mL):\n", vol_cat)
print("[ ] HCl (mL):\n", vol_HCl)
print("[ ] NaCl (mL):\n", vol_NaCl)
print("[ ] Water (mL):\n", vol_water)

# === Main usage ===

# Define geometry and experimental ranges
rows, cols = 4, 6
conc_cat = 0.01
conc_HCl_start, conc_HCl_end = 0.0, 0.01
I_start, I_end = 0.02, 0.2

# Generate conditions
cat, MHCl, ionic = generate_wellplate_conditions(rows, cols, conc_cat, conc_HCl_start, conc_HCl_end, I_start, I_end)

# Calculate volumes
vol_cat, vol_HCl, vol_NaCl, vol_water = calculate_volumes(cat, MHCl, ionic)

# Output
print_volumes(vol_cat, vol_HCl, vol_NaCl, vol_water)

Exercise 0

Refactor the final code from Chapter 9. Produce a function that will automatically background subtract data passed to it. This is the refactor function.

def background_subtract(wavenumber, absorption, integration_limits):
"""Perform baseline subtraction using a linear fit between integration limits."""
# Find nearest index to limits
xmin_index = np.argmin(abs(wavenumber - integration_limits[0]))
xmax_index = np.argmin(abs(wavenumber - integration_limits[1]))

# Compute line connecting the endpoints
m = (absorption[xmax_index] - absorption[xmin_index]) / (wavenumber[xmax_index] - wavenumber[xmin_index])
b = absorption[xmin_index] - m * wavenumber[xmin_index]

# Create baseline and subtract
baseline = m * wavenumber + b
corrected = absorption - baseline

# Return both baseline-corrected and original baseline if needed
return corrected, baseline

You can use it this way:

# Perform background subtraction using helper function
spec['back corr'], _ = background_subtract(spec['wavenumber'], spec['absorption'], integration_limits)

Computing derivatives using numpy.gradient

Exercise 2

For each of the following functions, determine the accuracy of a numerical derivative. Create data with an $x$-axis from 0 to $2\pi$ with 10 data points. Make a figure with two subplots, where the top plot shows the function, its analytical derivative, and its numerical derivative using the forward-backward method, and the bottom plot shows the difference between the numerical and analytical solutions.

sine
Gaussian
Lorentzian

import numpy as np
import plotly.subplots as sp
import plotly.graph_objects as go

# Set up x-axis
x = np.linspace(0, 2*np.pi, 10)
dx = x[1] - x[0]

# Forward-backward finite difference derivative
def finite_diff(y, dx):
dydx = np.empty_like(y)
dydx[1:-1] = (y[2:] - y[:-2]) / (2 * dx)  # central
dydx[0] = (y[1] - y[0]) / dx              # forward
dydx[-1] = (y[-1] - y[-2]) / dx           # backward
return dydx

# Function definitions and derivatives
functions = {
'sin(x)': {
    'f': np.sin(x),
    'dfdx': np.cos(x)
},
'Gaussian': {
    'f': np.exp(-x**2),
    'dfdx': -2*x * np.exp(-x**2)
},
'

Exercise 3

Starting with your code from Exercise 2 Create a plot of the root-mean-squared deviation between the numerical and analytical solutions vs. number of data points for data with 10, 20, 30, 40, and 50 points. What do you notice?

import numpy as np
import plotly.graph_objects as go

# Forward-backward finite difference
def finite_diff(y, dx):
dydx = np.empty_like(y)
dydx[1:-1] = (y[2:] - y[:-2]) / (2 * dx)
dydx[0] = (y[1] - y[0]) / dx
dydx[-1] = (y[-1] - y[-2]) / dx
return dydx

# RMSD function
def rmsd(y1, y2):
return np.sqrt(np.mean((y1 - y2)**2))

# Functions and their derivatives
def sin_fn(x): return np.sin(x)
def sin_deriv(x): return np.cos(x)

def gauss_fn(x): return np.exp(-x**2)
def gauss_deriv(x): return -2 * x * np.exp(-x**2)

def lorentz_fn(x): return 1 / (1 + x**2)
def lorentz_deriv(x): return -2 * x / (1 + x**2)**2

# Test point counts
N_vals = [10, 20, 30, 40, 50]

# Store RMSD results
results = {
'sin(x)': [],
'Gaussian': [],
'Lorentzian': []
}

for N in N_vals:
x = np.linspace(0, 2 * np.pi, N)
dx = x[1] - x[0]

# sin(x)
y = sin_fn(x)
dydx_num = finite_diff(y, dx)
dydx_true = sin_deriv(x)
results['sin(x)'].append(rmsd(dydx_num, dydx_true))

# Gaussian
y = gauss_fn(x)
dydx_num = finite_diff(y, dx)
dydx_true = gauss_deriv(x)
results['Gaussian'].append(rmsd(dydx_num, dydx_true))

# Lorentzian
y = lorentz_fn(x)
dydx_num = finite_diff(y, dx)
dydx_true = lorentz_deriv(x)
results['Lorentzian'].append(rmsd(dydx_num, dydx_true))

# Plot RMSD vs N
fig = go.Figure()
for label, errors in results.items():
fig.add_scatter(x=N_vals, y=errors, mode='lines+markers', name=label)

fig.update_layout(
title="RMSD of Numerical vs Analytical Derivative vs Number of Points",
xaxis_title="Number of Points",
yaxis_title="RMSD",
yaxis_type='log'
)
fig.show()

RMSD decreases as the number of points increases — finer sampling leads to more accurate derivatives.
sin(x) tends to converge the fastest due to its smooth, periodic nature.
Gaussian and Lorentzian converge more slowly, especially the Lorentzian, which has sharp curvature and longer tails.
The improvement follows roughly power-law decay, visible as a straight line on the log scale — consistent with first-order accuracy of the forward-backward finite difference method.

Exercise 4

Redo Exercise 3 using only forward derivatives and compare the accuracy of the two methods.

import numpy as np
import plotly.graph_objects as go

# Derivative methods
def forward_diff(y, dx):
dydx = np.empty_like(y)
dydx[:-1] = (y[1:] - y[:-1]) / dx
dydx[-1] = dydx[-2]  # approximate last point to match length
return dydx

def forward_backward_diff(y, dx):
dydx = np.empty_like(y)
dydx[1:-1] = (y[2:] - y[:-2]) / (2 * dx)
dydx[0] = (y[1] - y[0]) / dx
dydx[-1] = (y[-1] - y[-2]) / dx
return dydx

# RMSD
def rmsd(y1, y2):
return np.sqrt(np.mean((y1 - y2)**2))

# Functions and derivatives
def sin_fn(x): return np.sin(x)
def sin_deriv(x): return np.cos(x)

def gauss_fn(x): return np.exp(-x**2)
def gauss_deriv(x): return -2 * x * np.exp(-x**2)

def lorentz_fn(x): return 1 / (1 + x**2)
def lorentz_deriv(x): return -2 * x / (1 + x**2)**2

N_vals = [10, 20, 30, 40, 50]
funcs = {
'sin(x)': (sin_fn, sin_deriv),
'Gaussian': (gauss_fn, gauss_deriv),
'Lorentzian': (lorentz_fn, lorentz_deriv),
}

# Results
results_fwd = {name: [] for name in funcs}
results_fb = {name: [] for name in funcs}

# Compute RMSD for both methods
for N in N_vals:
x = np.linspace(0, 2 * np.pi, N)
dx = x[1] - x[0]

for name, (f, df) in funcs.items():
    y = f(x)
    true = df(x)
    
    num_fwd = forward_diff(y, dx)
    num_fb = forward_backward_diff(y, dx)
    
    results_fwd[name].append(rmsd(num_fwd, true))
    results_fb[name].append(rmsd(num_fb, true))

# Plotting
fig = go.Figure()
for name in funcs:
fig.add_scatter(x=N_vals, y=results_fwd[name], mode='lines+markers',
                name=f'{name} (Forward)')
fig.add_scatter(x=N_vals, y=results_fb[name], mode='lines+markers',
                name=f'{name} (Forward-Backward)')

fig.update_layout(
title="RMSD of Numerical Derivatives vs Number of Points",
xaxis_title="Number of Points",
yaxis_title="RMSD (log scale)",
yaxis_type='log'
)
fig.show()

Forward-backward (centered) difference is more accurate, especially for smoother functions like sin(x) and the Gaussian.
The forward method converges more slowly and has consistently higher RMSD.
The advantage of central differences becomes more significant with fewer points (coarser sampling).

Importing objects from Python files in arbitrary locations

Exercise 5

Return to the final code for Chapter 7, and replace the block that imported code from an external script with code that uses importFromPy().

'''
A program to fit a titration to the Hendersson-Hassebalch equations
Requires: pH_response from titration.py
          .csv files with col 1 as wavelength and col 2 as intensity  
Written by: Ben Lear and Chris Johnson (authors@codechembook.com)
v1.0.0 - 250214 - initial version
'''
import numpy as np
from pathlib import Path
from lmfit import Model
from codechembook.quickPlots import plotFit
import codechembook.symbols as cs
from codechembook.quickTools import quickOpenFilename, quickPopupMessage, importFromPy
import os

# Ask the user to specify a file with the data and read it
exp_eqs, exp_pHs = np.genfromtxt(quickOpenFilename(), delimiter = ',', skip_header = 1, unpack = True)

importFromPy("TitrationModel.py", "pH_response")

# Define a new lmfit model using the pH_response function
pH_model = Model(pH_response, independent_vars=['eqs'])  # set up the model with eqs as the 'x' axis

# Set up the fit parameter and non-adjustable parameter for initial amount of acid
pH_params = pH_model.make_params() # make a parameter object
pH_params.add('pKa', value = np.mean(exp_pHs)) # specifications for the parameter
base_i = 0.05 # the initial amount of acid

# Fit the model to the data and store the results
pH_fit = pH_model.fit(data = exp_pHs, eqs = exp_eqs, params = pH_params,  base_i = base_i)

# Create a figure for the fit result but don't show it yet
fig = plotFit(pH_fit, residual = True, xlabel = 'equivalents added', ylabel = 'pH', output = None)

# Add a horzontal line to highlight the pKa as determined by the fit
fig.add_scatter(x = [min(exp_eqs), max(exp_eqs)], 
            y = [pH_fit.params['pKa'].value, pH_fit.params['pKa'].value], 
            mode = 'lines', showlegend=False,
            line = dict(color = 'gray'))

# Add an annotation containing the best fitting pKa and its uncertainty
fig.add_annotation(x = max(exp_eqs), y = pH_fit.params['pKa'].value, 
               xanchor = 'right', yanchor = 'bottom',
               text = f'pKa = {pH_fit.params['pKa'].value:.3f} {cs.math.plusminus} {pH_fit.params['pKa'].stderr:.3f}',
               showarrow = False)
fig.show('png')

Exercise 5

Using importFromPy() get the $x$ and $y$ data from the final code in Chapter 2. Then use importFromPy() to get the final extinction coefficient from the code in Chapter 5. Then make a plot of the data, but rescale the $y$ values to be in terms of concentration, rather than absorbance. Label the axes appropriately.

from codechembook.quickTools import importFromPy

# get the data from Chapter 2, take in as the figure
# don't forget you will be prompted to find the data to plot
importFromPy("PlotCatalystUVvis.py", "UVvis")


# get the extinction coefficity from chapter 5
# don't forget you will be prompted to find the folder to plot and the wavelength of interest
importFromPy("ExtinctionCalc.py", "result")

#plot the data from the UVvis figure, but with the y-rescaled
from codechembook.quickPlots import quickScatter
quickScatter(UVvis.data[0].x, UVvis.data[0].y/result.params["slope"].value, xlabel = "wavelength /nm", ylabel = "concentration /M")

IMAGE

Comprehensive exercises

Exercise 7

Derivatives tend to amplify noise. Using the X-ray powder diffraction data from the website for this book, compare the results when computing the derivative first, then performing Savitzky-Golay filtering to the opposite. Which one produces more noise? You should do this in two steps, separately computing derivatives and smoothing, but note (for future use) that scipy.savgol_filter() can give you derivatives automatically with the deriv keyword.

import numpy as np
from scipy.signal import savgol_filter
from codechembook.quickTools import quickOpenFilename
from plotly.subplots import make_subplots


xrd_file = quickOpenFilename()

xrd_x, xrd_y = np.genfromtxt(xrd_file, delimiter=",", unpack = True)

fig = make_subplots(rows = 2, cols = 1)
fig.add_scatter(x = xrd_x, y = xrd_y, name = "experimental")

# compute the derivative, and then apply SG filtering
xrd_dy2 = np.gradient(xrd_y, xrd_x, edge_order = 2)
xrd_dy2_sg = savgol_filter(xrd_dy2, 5, 3)

fig.add_scatter(x = xrd_x, y = xrd_dy2_sg, name = "der then smooth", row = 2, col = 1)

# compute the sg filter, and then take the derivative
xrd_sg = savgol_filter(xrd_y, 5, 3)
xrd_sg_dy2 = np.gradient(xrd_sg, xrd_x, edge_order = 2)

fig.add_scatter(x = xrd_x, y = xrd_sg_dy2, name = "smooth, then der", row = 2, col = 1)

fig.show("browser")

Exercise 8

Write your own peak finding algorithm using derivatives. Using the file “HCl-IR.csv,” from the website for this book, which contains the FTIR spectrum of HCl, create a code that finds peaks without using scipy.find_peaks(). Remember that at a peak, the first derivative is zero and the second derivative is negative.

Read the file and plot the data, its first derivative, and its second derivative. Note that the first and second derivatives may be much smaller or larger than the data, so you may want to apply a scaling factor to each such that they are all about the same height as the data. Zoom in on a smaller peak to confirm that the derivatives look as expected.
A peak should be a maximum in the signal, a negative-going zero crossing in the first derivative, and a minimum for the second derivative. Since noise looks like peaks to a computer, you will also need to have a “threshold,” or a minimum intensity in the spectrum that distinguishes a peak from noise. Any zero crossing at a point where the signal is less than the threshold should be ignored.

The first derivative is unlikely to be precisely zero anywhere. It is much easier to find the point just before and just after the zero crossing of the first derivative and pick as the peak whichever of the two points is higher in the signal. Plot the peaks you picked on top of the data using a scatter plot. Zoomed in to a few peaks to confirm that your code works correctly.

We have also provided three other files, another FTIR spectrum “CO2-IR.csv,” and NMR spectrum “NMR.csv,: and a powder diffraction pattern “KBr.csv.: Using this same code (changing only the thresholds and scaling factors for the derivatives), find the peaks. Note that the diffraction data is noisy - comment on how does this affect your peak picker. What could you do about it?

A good trick can be to “weight” the derivatives to make it easier to tell the real peaks from the noise ones. This can be accomplished by multiplying the derivative by the signal, so that the amplitude of the derivative at each peak is enhanced. Implement this and see how this impacts your results.
You could get a more accurate estimate of the peak by explicitly determining the zero crossing. For the two points in your derivative data that are just above and just below zero, compute x value at which the derivative crosses zero. You can do this by computing the slope, $m = (y_2 - y_1)/(x_2 - x_1)$, with point 2 as the point just below zero and point 1 the point just above zero. Then, solve the same equation for $x_2$ (your zero crossing) where $y_2 = 0$ and $m$ is the slope you just computed. Implement this and comment on how it impacts your results.

import numpy as np  
from codechembook.quickPlots import quickScatter, plotFit  
from codechembook.symbols import chem  
from codechembook.quickTools import quickReadCSV  
from lmfit.models import GaussianModel, ConstantModel  
  
x, y = quickReadCSV()  
  
dydx = np.gradient(y, x)  
  
d2ydx2 = np.gradient(dydx, x)  
  
fig = quickScatter(x, y, mode = 'lines', output = 'None', name = 'Spectrum')  
fig.add_scatter(x = x, y = dydx / 10, mode = 'lines', name = 'Derivative')  
fig.add_scatter(x = x, y = d2ydx2 / 100, mode = 'lines', name = '2nd Derivative')  
fig.add_scatter(x = x, y = dydx * y, mode = 'lines', name = 'Weighted Derivative')  
fig.show('browser')  
  
  
threshold = 0.0015  
peaks = []  
for i in range(len(dydx)-1):  
    if dydx[i] >= 0 and dydx[i+1] < 0 and y[i] > threshold:  
        if y[i] >= y[i+1]:  
            peaks.append(i)  
        else:  
            peaks.append(i+1)  
  
  
fig2 = quickScatter(x, y, mode = 'lines', output = 'None', name = 'Spectrum')  
fig2.add_scatter(x = x[peaks], y = y[peaks], mode = 'markers', name = 'Peaks')  
fig2.show('browser')  
  
model = ConstantModel()  
params = model.make_params()  
for i in peaks:  
    prefix = f'G{round(x[i])}'  
    temp_model = GaussianModel(prefix = prefix)  
    pars = temp_model.make_params(amplitude = y[i], center = x[i], sigma = 0.25)  
    model = model + temp_model  
    params.update(pars)  
         
result = model.fit(y, x = x, params = params)  
  
print(result.fit_report())  
  
plotFit(result, residual = True, xlabel = f"Wavenumber ({chem.wavenumber})", ylabel = 'Intensity', output = 'png')

IMAGE

Exercise 9

Sometimes spectroscopists find it easier to determine the number of peaks in a complex, overlapping band, by taking the derivative of the spectrum. To figure out why, and apply this to generate initial guesses for the code at the end of Chapter 6. Hints: You can answer the fundamental question by making your own overlapping peaks and taking their derivative. Then, ask yourself what are the different ways to find peaks!

'''
Fit data to multiple gaussian components and a linear background
Requires: a .csv file with col 1 as wavelength and col 2 as intensity  
Written by: Ben Lear and Chris Johnson (authors@codechembook.com)
v1.0.0 - 250207 - initial version
'''
import numpy as np
from lmfit.models import LinearModel, GaussianModel
from codechembook.quickPlots import plotFit
import codechembook.symbols as cs
from codechembook.quickTools import quickOpenFilename, quickPopupMessage
from codechembook.quickPlots import quickScatter
from plotly.subplots import make_subplots
from scipy.signal import savgol_filter, find_peaks

# Ask the user for the filename containing the data to analyze
quickPopupMessage(message = 'Select the file 0.001.csv')
file = quickOpenFilename(filetypes = 'CSV Files, *.csv')

# Read the file and unpack into arrays
wavelength, absorbance = np.genfromtxt(file, skip_header=1, unpack = True, delimiter=',')

# Set the upper and lower wavelength limits of the region of the spectrum to analyze
lowerlim, upperlim = 450, 750

# Slice data to only include the region of interest
trimmed_wavelength = wavelength[(wavelength >= lowerlim) & (wavelength < upperlim)]
trimmed_absorbance = absorbance[(wavelength >= lowerlim) & (wavelength < upperlim)]

# here is where we use derivatives and smoothing to try to identify peaks
derivative = np.gradient(trimmed_absorbance, trimmed_wavelength, edge_order=2)
smoothed_derivative = savgol_filter(derivative, 20,3)

derivative2 = np.gradient(smoothed_derivative, trimmed_wavelength, edge_order=2)
smoothed_derivative2 = savgol_filter(derivative2, 20,3)

# this part just lets us see what is happening
der_fig = make_subplots()
der_fig.add_scatter(x = trimmed_wavelength, y = trimmed_absorbance/np.max(trimmed_absorbance), name = "experimental")
der_fig.add_scatter(x = trimmed_wavelength, y = smoothed_derivative/np.max(smoothed_derivative), name = "derivative")
der_fig.add_scatter(x = trimmed_wavelength, y = smoothed_derivative2/np.max(smoothed_derivative2), name = "2nd derivative")
der_fig.show("png")

# now try to extract 3 peaks, by lowering the threshold until we find 3
i = 1
peak_indexes = []
while len(peak_indexes) != 3 and i > 0:
peak_indexes = find_peaks(-1*smoothed_derivative2/np.max(smoothed_derivative2), prominence = i)[0]
i = i - 0.01

position_guesses = [trimmed_wavelength[peak_indexes[0]], trimmed_wavelength[peak_indexes[1]], trimmed_wavelength[peak_indexes[2]]]

# Construct a composite model and include initial guesses
final_mod = LinearModel(prefix='lin_') # start with a linear model and add more later
pars = final_mod.guess(trimmed_absorbance, x=trimmed_wavelength) # get guesses for linear coefficients
c_guesses = position_guesses # initial guesses for centers
s_guess = 10 # initial guess for widths
a_guess = 20 # initial guess for amplitudes
for i, c in enumerate(c_guesses): # loop through each peak to add corresponding gaussian component
gauss = GaussianModel(prefix=f'g{i+1}_') # create temporary gaussiam model
pars.update(gauss.make_params(center=dict(value=c), # set initial guesses for parameters
                           amplitude=dict(value=a_guess, min = 0),
                           sigma=dict(value=s_guess, min = 0, max = 25)))
final_mod = final_mod + gauss # add each peak to the overall model

# Fit the model to the data and store the results
result = final_mod.fit(trimmed_absorbance, pars, x=trimmed_wavelength)

# Create a plot of the fit results but don't show it yet
plot = plotFit(result, residual = True, components = True, xlabel = 'wavelength /nm', ylabel = 'intensity', output = None)

# Add best fitting value for the center of each gaussian component as annotations
for i in range(1, len(c_guesses)+1): # loop through components and add annotations with centers
plot.add_annotation(text = f'{result.params[f'g{i}_center'].value:.1f} {cs.math.plusminus} {result.params[f'g{i}_center'].stderr:.1f}', 
                    x = result.params[f'g{i}_center'].value, 
                    y = i*.04 + result.params[f'g{i}_amplitude'].value / (result.params[f'g{i}_sigma'].value * np.sqrt(2*np.pi)),
                    showarrow = False)

plot.show('png') # show the final plot

Chapter 13

Table of Contents

Data

Code from chapter

Solutions to Exercises

Targeted exercises

Refactoring code to turn it into functions you can reuse

Exercise 0

Exercise 0

Computing derivatives using numpy.gradient

Exercise 2

Exercise 3

Exercise 4

Importing objects from Python files in arbitrary locations

Exercise 5

Exercise 5

Comprehensive exercises

Exercise 7

Exercise 8

Exercise 9