Source code for DiadFit.diads

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import patches
import lmfit
from lmfit.models import GaussianModel, VoigtModel, LinearModel, ConstantModel, PseudoVoigtModel, Pearson4Model
from scipy.signal import find_peaks
from scipy.signal.windows import gaussian


import os
import re
from os import listdir
from os.path import isfile, join
from DiadFit.importing_data_files import *
from typing import Tuple, Optional
from dataclasses import dataclass
import matplotlib.patches as patches
import warnings as w
from tqdm import tqdm
from scipy.integrate import trapezoid
from scipy.integrate import simpson
from scipy.interpolate import interp1d


# Allowed models

allowed_models = ["VoigtModel", "PseudoVoigtModel", "Pearson4Model", "SkewedVoigtModel"]

# For debuggin

#import warnings
#warnings.simplefilter('error')

encode="ISO-8859-1"

[docs] def plot_diad(*,path=None, filename=None, filetype='Witec_ASCII', Spectra_x=None, Spectra_y=None): """This function makes a plot of the spectra for a specific file to allow visual inspectoin Parameters ---------------- Either path: str Folder where spectra is stored filename: str Filename of specific spectra file of interest filetype: str choose from 'Witec_ASCII', 'headless_txt', 'headless_csv', 'head_csv', 'Witec_ASCII', 'HORIBA_txt', 'Renishaw_txt' Or Spectra_x: np.array of x coordinates Spectra_y: np.array of y coordinates Returns plots figure with common peaks overlain """ if 'CRR' in filename: filetype='headless_txt' if Spectra_x is None: Spectra_df=get_data(path=path, filename=filename, filetype=filetype) Spectra=np.array(Spectra_df) Spectra_x=Spectra[:, 0] Spectra_y=Spectra[:, 1] fig, (ax1) = plt.subplots(1, 1, figsize=(4,3)) miny=np.min(Spectra_y) maxy=np.max(Spectra_y) ax1.plot([1090, 1090], [miny, maxy], ':k', label='Magnesite') ax1.plot([1131, 1131], [miny, maxy], '-', alpha=0.5,color='grey', label='Anhydrite/Mg-Sulfate') #ax1.plot([1136, 1136], [miny, maxy], '-', color='grey', label='Mg-Sulfate') ax1.plot([1151, 1151], [miny, maxy], ':c', label='SO2') ax1.plot([1286, 1286], [miny, maxy], '-g', alpha=0.5,label='Diad1') ax1.plot([1389, 1389], [miny, maxy], '-m', alpha=0.5, label='Diad2') ax1.legend() ax1.plot(Spectra_x, Spectra_y, '-r') ax1.set_xlabel('Wavenumber (cm-1)') ax1.set_ylabel('Intensity')
[docs] @dataclass class diad_id_config: # Thresholds for Scipy find peaks height: float = 1 width: float=0.5 prominence: float=50 # to plot or not to plot plot_figure: bool = True # Exclude a range, e.g. cosmic rays exclude_range1: Optional [Tuple[float, float]] = None exclude_range2: Optional [Tuple[float, float]] = None # Approximate diad position approx_diad2_pos: Tuple[float, float]=(1379, 1395) approx_diad1_pos: Tuple[float, float]=(1275, 1290) # Background positions for prominence LH_bck_diad1=[1180, 1220] LH_bck_diad2=[1330, 1350] RH_bck_diad1=[1330, 1350] RH_bck_diad2=[1450, 1470] Diad_window_width=30 Diad2_window: Tuple[float, float]=(approx_diad2_pos[0]-Diad_window_width, approx_diad2_pos[1]+Diad_window_width) Diad1_window: Tuple[float, float]=(approx_diad1_pos[0]-Diad_window_width, approx_diad1_pos[1]) approx_diad2_pos_3peaks: Tuple[float, float]=(1379, 1395, 1379-17)
[docs] def identify_diad_peaks(*, config: diad_id_config=diad_id_config(), path=None, filename=None, diad_array=None, filetype='Witec_ASCII', plot_figure=True): """ This function fits a spline to the spectral data. It then uses Scipy find peaks to identify the diad, HB and C13 peaks. It outputs parameters such as peak positions, peak prominences, and peak height ratios that can be very useful when splitting data into groups. It uses information stored in diad_id_config for most parameters Parameters ------------- config: dataclass This is diad_id_config, which you should edit before you feed into the function to adjust peak finding parameters. E.g. to change prominence of peaks found by scipy, do config=pf.diad_id_config(prominence=12), then input into this function Things that can be adjusted in this config file height: 1 (threshold height for Scipy find peaks) width: 0.5 (threshold width for Scipy find peaks) prominence: 50 (threshold prominence for scipy find peaks) exclude_range1: None (default ) or list - excludes a certain region of the spectra (e.g, [1200, 1250]) exclude_range2: None (default ) or list - excludes a certain region of the spectra (e.g, [1300, 1310]) Diad_window_width: 30 - Window to the left of Diad1 and to the right of Diad2 to look fo rdiad + hotbands. approx_diad2_pos: (1379, 1395) - Region for code to look for diad2 (+-Diad_window_width). approx_diad1_pos: (1275, 1290) - Region for code to look for diad1 (+-Diad_window_width). LH_bck_diad1, LH_bck_diad2, RH_bck_diad1, RH_bck_diad2: Positions either side of peaks to calculate prominences (e.g. LH_bck_diad1=[1180, 1220]) path: str Path where spectra is stored filename: str Specific name of file being plotted filetype: str choose from 'Witec_ASCII', 'headless_txt', 'headless_csv', 'head_csv', 'Witec_ASCII', 'HORIBA_txt', 'Renishaw_txt' plot_figure: bool if True, plots a figure of peaks, and the identified peaks that Scipy found Returns ------------- pd.DataFrame Dataframe of approximate peak positions, prominences, etc. np.array of x and y coordinates of spectra figure if plot_figure is True """ if diad_array is None: Diad_df=get_data(path=path, filename=filename, filetype=filetype) Diad=np.array(Diad_df) else: Diad=diad_array # First lets use find peaks y_in=Diad[:, 1] x_in=Diad[:, 0] spec_res=np.abs(x_in[1]-x_in[0]) # Now lets fit a cubic spline to smoooth it all out a bit # Get x and y ready to make a cubic spline through data # Fit a cubic spline f2 = interp1d(x_in, y_in, kind='cubic') x_new=np.linspace(np.min(x_in), np.max(x_in), 100000) y_cub=f2(x_new) y=y_cub x=x_new # Spacing of hotband from main peak diad2_HB2_min_offset=19-spec_res diad2_HB2_max_offset=23+spec_res # Spacing of HB from main peak. diad1_HB1_min_offset=19-spec_res diad1_HB1_max_offset=23+spec_res # Spacing of c13 from main peak diad2_C13_min_offset=16.5-spec_res diad2_C13_max_offset=20+spec_res peaks = find_peaks(y, height = config.height, prominence=config.prominence, width=config.width) # This gets the list of peak positions. peak_pos = x[peaks[0]] height = peaks[1]['peak_heights'] df=pd.DataFrame(data={'pos': peak_pos, 'height': height}) # Here, we are looking for peaks in the diad window. df_pks_diad1=df[(df['pos']>config.Diad1_window[0]) & (df['pos']<config.Diad1_window[1]) ] # Find peaks within the 2nd diad window df_pks_diad2=df[(df['pos']>config.Diad2_window[0]) & (df['pos']<config.Diad2_window[1]) ] # Find N highest peaks within range you have selected. df_sort_diad1=df_pks_diad1.sort_values('height', axis=0, ascending=False) if len(df_sort_diad1)>2: df_sort_diad1_trim=df_sort_diad1 #[0:2] else: df_sort_diad1_trim=df_sort_diad1 df_sort_diad2=df_pks_diad2.sort_values('height', axis=0, ascending=False) if len(df_sort_diad2)>3: df_sort_diad2_trim=df_sort_diad2 #[0:3] else: df_sort_diad2_trim=df_sort_diad2 # Check if any peaks lie where we expect the second diad right_pos_diad2=df_sort_diad2_trim['pos'].between(config.approx_diad2_pos[0], config.approx_diad2_pos[1]) if any(right_pos_diad2): manual_diad2=False df_sort_diad2_rightpos=df_sort_diad2_trim.loc[right_pos_diad2] diad_2_diad=df_sort_diad2_rightpos.loc[df_sort_diad2_rightpos['height']==np.max(df_sort_diad2_rightpos['height'])] df_out=pd.DataFrame(data={'filename': filename, 'Diad2_pos': diad_2_diad['pos'], 'Diad2_height': diad_2_diad['height']}) # Allocate hot band 2 position if any(df_sort_diad2_trim['pos'].between(diad_2_diad['pos'].iloc[0]+diad2_HB2_min_offset, diad_2_diad['pos'].iloc[0]+diad2_HB2_max_offset)): diad_2_HB=df_sort_diad2_trim.loc[df_sort_diad2_trim['pos'].between(diad_2_diad['pos'].iloc[0]+diad2_HB2_min_offset, diad_2_diad['pos'].iloc[0]+diad2_HB2_max_offset)] df_out['HB2_pos']=diad_2_HB['pos'].iloc[0] df_out['HB2_height']=diad_2_HB['height'].iloc[0] else: df_out['HB2_pos']=np.nan df_out['HB2_height']=np.nan # Look for C13 position if any(df_sort_diad2_trim['pos'].between(diad_2_diad['pos'].iloc[0]-diad2_C13_max_offset, diad_2_diad['pos'].iloc[0]-diad2_C13_min_offset)): diad_2_C13=df_sort_diad2_trim.loc[df_sort_diad2_trim['pos'].between(diad_2_diad['pos'].iloc[0]-diad2_C13_max_offset, diad_2_diad['pos'].iloc[0]-diad2_C13_min_offset)] df_out['C13_pos']=diad_2_C13['pos'].iloc[0] df_out['C13_height']=diad_2_C13['height'].iloc[0] else: df_out['C13_pos']=np.nan df_out['C13_height']=np.nan # If it didnt find anything where we expect Diad2 to be, find max position else: manual_diad2=True # Lets find the highest bit within this range diad2_range=(Diad[:, 0]>config.approx_diad2_pos[0]) & (Diad[:, 0]<config.approx_diad2_pos[1]) diad2_height=np.max(Diad[:, 1][diad2_range]) diad2_pos=Diad[:, 0][(Diad[:, 1]==diad2_height)&diad2_range] df_out=pd.DataFrame(data={'filename': filename, 'Diad2_pos': diad2_pos[0] , 'Diad2_height':diad2_height}, index=[0]) # Lets try to find the hotband and hope its here! if any(df_sort_diad2_trim['pos'].between(df_out['Diad2_pos'].iloc[0]+diad2_HB2_min_offset, df_out['Diad2_pos'].iloc[0]+diad2_HB2_max_offset)): diad_2_HB=df_sort_diad2_trim.loc[df_sort_diad2_trim['pos'].between(df_out['Diad2_pos'].iloc[0]+diad2_HB2_min_offset, df_out['Diad2_pos'].iloc[0]+diad2_HB2_max_offset)] df_out['HB2_pos']=diad_2_HB['pos'].iloc[0] df_out['HB2_height']=diad_2_HB['height'].iloc[0] else: df_out['HB2_pos']=np.nan df_out['HB2_height']=np.nan # Lets try and find the C13 peak if any(df_sort_diad2_trim['pos'].between(df_out['Diad2_pos'].iloc[0]-diad2_C13_max_offset, df_out['Diad2_pos'].iloc[0]-diad2_C13_min_offset)): diad_2_C13=df_sort_diad2_trim.loc[df_sort_diad2_trim['pos'].between(df_out['Diad2_pos'].iloc[0]-diad2_C13_max_offset, df_out['Diad2_pos'].iloc[0]-diad2_C13_min_offset)] df_out['C13_pos']=diad_2_C13['pos'].iloc[0] df_out['C13_height']=diad_2_C13['height'].iloc[0] else: df_out['C13_pos']=np.nan df_out['C13_height']=np.nan # Do the same for diad 1 right_pos_diad1=df_sort_diad1_trim['pos'].between(config.approx_diad1_pos[0], config.approx_diad1_pos[1]) if any(right_pos_diad1): manual_diad1=False df_sort_diad1_rightpos=df_sort_diad1_trim.loc[right_pos_diad1] diad_1_diad=df_sort_diad1_rightpos.loc[df_sort_diad1_rightpos['height']==np.max(df_sort_diad1_rightpos['height'])] df_out['Diad1_pos']=diad_1_diad['pos'].iloc[0] df_out['Diad1_height']=diad_1_diad['height'].iloc[0] if any(df_sort_diad1_trim['pos'].between(diad_1_diad['pos'].iloc[0]-diad1_HB1_max_offset, diad_1_diad['pos'].iloc[0]-diad1_HB1_min_offset)): diad_1_HB=df_sort_diad1_trim.loc[df_sort_diad1_trim['pos'].between(diad_1_diad['pos'].iloc[0]-diad1_HB1_max_offset, diad_1_diad['pos'].iloc[0]-diad1_HB1_min_offset)] df_out['HB1_pos']=diad_1_HB['pos'].iloc[0] df_out['HB1_height']=diad_1_HB['height'].iloc[0] else: df_out['HB1_pos']=np.nan df_out['HB1_height']=np.nan else: manual_diad1=True diad1_range=(Diad[:, 0]>config.approx_diad1_pos[0]) & (Diad[:, 0]<config.approx_diad1_pos[1]) diad1_height=np.max(Diad[:, 1][diad1_range]) diad1_pos=Diad[:, 0][(Diad[:, 1]==diad1_height)&diad1_range] df_out['Diad1_pos']= diad1_pos[0] df_out['Diad1_height']=diad1_height # Lets see if we can find a hotband in here now if any(df_sort_diad1_trim['pos'].between(df_out['Diad1_pos'].iloc[0]-diad1_HB1_max_offset, df_out['Diad1_pos'].iloc[0]-diad1_HB1_min_offset)): diad_1_HB=df_sort_diad1_trim.loc[df_sort_diad1_trim['pos'].between(df_out['Diad1_pos'].iloc[0]-diad1_HB1_max_offset, df_out['Diad1_pos'].iloc[0]-diad1_HB1_min_offset)] df_out['HB1_pos']=diad_1_HB['pos'].iloc[0] df_out['HB1_height']=diad_1_HB['height'].iloc[0] else: df_out['HB1_pos']=np.nan df_out['HB1_height']=np.nan # Lets get the prominence of the main peaks and hotbands now we have found them Diad_x=Diad[:, 0] Diad_y=Diad[:, 1] Med_LHS_diad1=np.nanmedian(Diad_y[(Diad[:, 0]>config.LH_bck_diad1[0])& (Diad[:, 0]<config.LH_bck_diad1[1])]) Med_RHS_diad1=np.nanmedian(Diad_y[(Diad[:, 0]>config.RH_bck_diad1[0])& (Diad[:, 0]<config.RH_bck_diad1[1])]) Med_LHS_diad2=np.nanmedian(Diad_y[(Diad[:, 0]>config.LH_bck_diad2[0])& (Diad[:, 0]<config.LH_bck_diad2[1])]) Med_RHS_diad2=np.nanmedian(Diad_y[(Diad[:, 0]>config.RH_bck_diad2[0])& (Diad[:, 0]<config.RH_bck_diad2[1])]) #Med_central_back_diad2=np.nanmedian(Diad[(Diad[:, 0]>1300)& (Diad[:, 0]<1350)] Diad_diad1=Diad_y[(Diad[:, 0]>1260)& (Diad[:, 0]<1300)] Diad_diad2=Diad_y[(Diad[:, 0]>1380)& (Diad[:, 0]<1400)] Med_bck_diad1=(Med_LHS_diad1+Med_RHS_diad1)/2 Med_bck_diad2=(Med_RHS_diad2+Med_LHS_diad2)/2 df_out['Diad1_Median_Bck']=Med_bck_diad1 df_out['Diad2_Median_Bck']=Med_bck_diad2 df_out['Diad1_abs_prom']=df_out['Diad1_height']-Med_bck_diad1 df_out['Diad2_abs_prom']=df_out['Diad2_height']-Med_bck_diad2 df_out['Diad1_rel_prom']=df_out['Diad1_height']/Med_bck_diad1 df_out['Diad2_rel_prom']=df_out['Diad2_height']/Med_bck_diad2 df_out['HB1_abs_prom']=df_out['HB1_height']-Med_LHS_diad1 df_out['HB2_abs_prom']=df_out['HB2_height']-Med_RHS_diad2 df_out['HB1_rel_prom']=df_out['HB1_height']/Med_LHS_diad1 df_out['HB2_rel_prom']=df_out['HB2_height']/Med_RHS_diad2 df_out['approx_split']=df_out['Diad2_pos']-df_out['Diad1_pos'] # Lets get the C13 prominence if any(df_out['C13_pos']>-500): C13_back=np.quantile(Diad_y[ ((Diad_x>(df_out['C13_pos'].iloc[0]-3))&(Diad_x<(df_out['C13_pos'].iloc[0]+3))) ], 0.25) df_out['C13_abs_prom']=df_out['C13_height']-C13_back df_out['C13_rel_prom']=df_out['C13_height']/C13_back df_out['C13_HB2_abs_prom_ratio']=df_out['HB2_abs_prom']/df_out['C13_abs_prom'] else: df_out['C13_abs_prom']=np.nan df_out['C13_rel_prom']=np.nan df_out['C13_HB2_abs_prom_ratio']=np.nan # Lets get the prominence of the valley between hotbands and the diad # Find the minimum intensity between the diad and HB if not np.isnan(df_out['HB2_pos'].iloc[0]): Diad2_between=Diad[(Diad[:, 0]<df_out['HB2_pos'].iloc[0])& (Diad[:, 0]>df_out['Diad2_pos'].iloc[0])] Min_y_midpoint2=np.min(Diad2_between[:, 1]) # Option here RHS_x_Diad2=(df_out['HB2_pos'].iloc[0])+20 RHS_y_Diad2=np.median(Diad[:, 1][ (Diad[:, 0]<RHS_x_Diad2+2*spec_res) &(Diad[:, 0]>RHS_x_Diad2-2*spec_res)]) df_out['Diad2_HB2_Valley_prom']=Min_y_midpoint2/RHS_y_Diad2 else: df_out['Diad2_HB2_Valley_prom']=np.nan if not np.isnan(df_out['HB1_pos'].iloc[0]): Diad1_between=Diad[(Diad[:, 0]>df_out['HB1_pos'].iloc[0])& (Diad[:, 0]<df_out['Diad1_pos'].iloc[0])] Min_y_midpoint1=np.min(Diad1_between[:, 1]) # option here seems to work better LHS_x_Diad1=(df_out['HB1_pos'].iloc[0])-20 LHS_y_Diad1=np.median(Diad[:, 1][ (Diad[:, 0]<LHS_x_Diad1+2.1*spec_res) &(Diad[:, 0]>LHS_x_Diad1-2.1*spec_res)]) df_out['Diad1_HB1_Valley_prom']=Min_y_midpoint1/LHS_y_Diad1 else: df_out['Diad1_HB1_Valley_prom']=np.nan # Divide by the median background position found above # Other useful params # Mean of HB elevation. df_out['Mean_Diad_HB_Valley_prom']=(df_out['Diad2_HB2_Valley_prom']+df_out['Diad1_HB1_Valley_prom'])/2 # Mean HB prominence df_out['Mean_abs_HB_prom']=(df_out['HB1_abs_prom']+df_out['HB2_abs_prom']) df_out['Diad2_HB2_abs_prom_ratio']=df_out['Diad2_abs_prom']/df_out['HB2_abs_prom'] df_out['Diad1_HB1_abs_prom_ratio']=df_out['Diad1_abs_prom']/df_out['HB1_abs_prom'] # Average these df_out['Av_Diad_HB_prom_ratio']=(df_out['Diad2_HB2_abs_prom_ratio']+df_out['Diad1_HB1_abs_prom_ratio'])/2 # Parameter for amount of noise between diads vs. height of peaks between_diads_x=(x>diad_id_config.RH_bck_diad1[0])&(x<diad_id_config.LH_bck_diad2[1]) std_bet_diad=np.std(y[between_diads_x]) df_out['Diad1_prom/std_betweendiads']=df_out['Diad1_abs_prom']/std_bet_diad df_out['Diad2_prom/std_betweendiads']=df_out['Diad2_abs_prom']/std_bet_diad df_out['HB1_prom/std_betweendiads']=df_out['HB1_abs_prom']/std_bet_diad df_out['HB2_prom/std_betweendiads']=df_out['HB2_abs_prom']/std_bet_diad # New ones df_out['Av_Diad_prom/std_betweendiads']=(df_out['Diad1_prom/std_betweendiads']+df_out['Diad2_prom/std_betweendiads'])/2 df_out['C13_prom/HB2_prom']=df_out['C13_abs_prom']/df_out['HB2_abs_prom'] df_out['Left_vs_Right']=Med_LHS_diad1/Med_RHS_diad2 # Lets sort based on columns we want near each other cols_to_move = ['filename', 'approx_split', 'Diad1_pos', 'Diad2_pos','HB1_pos', 'HB2_pos', 'C13_pos', 'Diad1_abs_prom', 'Diad2_abs_prom', 'HB1_abs_prom', 'HB2_abs_prom', 'C13_abs_prom', 'Mean_abs_HB_prom', 'Diad2_HB2_abs_prom_ratio', 'Diad1_HB1_abs_prom_ratio', 'Diad1_rel_prom', 'Diad2_rel_prom', 'HB1_rel_prom', 'HB2_rel_prom', 'C13_rel_prom', 'Diad1_HB1_Valley_prom', 'Mean_Diad_HB_Valley_prom', 'Diad1_prom/std_betweendiads', 'Diad2_prom/std_betweendiads', 'Av_Diad_prom/std_betweendiads', 'C13_prom/HB2_prom', 'Av_Diad_HB_prom_ratio', 'Left_vs_Right'] df_out = df_out[cols_to_move + [ col for col in df_out.columns if col not in cols_to_move]] # Now lets get approximate strength subtracting the backgrround # Now lets make a plot if plot_figure is True: fig, (ax0, ax1, ax2) = plt.subplots(1, 3, figsize=(12,4)) ax0.plot(Diad[:, 0], Diad[:, 1], '-r') ax0.plot(df['pos'], df['height'], '*k') ax1.plot(df['pos'], df['height'], '*k', label='All Scipy Peaks') ax2.plot(df['pos'], df['height'], '*k') if manual_diad1 is True: ax1.plot(diad1_pos[0], diad1_height, 'dk', mfc='yellow', ms=7, label='SciPyMissed') if manual_diad2 is True: ax1.plot(diad2_pos[0], diad2_height, 'dk', mfc='yellow', ms=7) #ax0.legend() ax1.set_title('Diad1') ax1.plot(Diad[:, 0],Diad[:, 1], '-r') ax1.set_xlim([config.Diad1_window[0], config.Diad1_window[1]]) ax2.set_title('Diad2') ax2.plot(Diad[:, 0],Diad[:, 1], '-r') ax2.set_xlim([config.Diad2_window[0], config.Diad2_window[1]]) #ax0.set_ylim[np.nanmin(Diad[:, 1]), np.nanmax(Diad[:, 1]) ]) fig.tight_layout() ax2.plot(df_sort_diad2_trim['pos'], df_sort_diad2_trim['height'], '*k', mfc='yellow', ms=10) ax1.plot(df_sort_diad1_trim['pos'], df_sort_diad1_trim['height'], '*k', mfc='yellow', ms=10, label='Selected Pks') ax1.legend() ax0.set_xlabel('Wavenumber') ax0.set_ylabel('Intensity') ax1.set_xlabel('Wavenumber') ax1.set_ylabel('Intensity') ax2.set_xlabel('Wavenumber') ax2.set_ylabel('Intensity') for i in range(0, len(df_sort_diad1_trim)): ax1.annotate(str(np.round(df_sort_diad1_trim['pos'].iloc[i], 1)), xy=(df_sort_diad1_trim['pos'].iloc[i]-10, df_sort_diad1_trim['height'].iloc[i]-1/7*(df_sort_diad1_trim['height'].iloc[i]-700)), xycoords="data", fontsize=10, rotation=90) for i in range(0, len(df_sort_diad2_trim)): ax2.annotate(str(np.round(df_sort_diad2_trim['pos'].iloc[i], 1)), xy=(df_sort_diad2_trim['pos'].iloc[i]-10, df_sort_diad2_trim['height'].iloc[i]-1/7*(df_sort_diad2_trim['height'].iloc[i]-700)), xycoords="data", fontsize=10, rotation=90) return df_out, Diad, fig if 'fig' in locals() else None
[docs] def loop_approx_diad_fits(*, spectra_path, config, Diad_Files, filetype, plot_figure): """ Uses the identify_diad_peaks function to loop over all files. Parameters ---------------- config: dataclass See documentation for identify_diad_peaks spectra_path: str Path where spectra is stored Diad_Files: list List of file names to loop over filetype: str choose from 'Witec_ASCII', 'headless_txt', 'headless_csv', 'head_csv', 'Witec_ASCII', 'HORIBA_txt', 'Renishaw_txt' plot_figure: bool if True, plots a figure of peaks, and the identified peaks that Scipy found Returns ----------- pd.DataFrame of the approximate peak parameters for each file. np.array of y coordinates of every spectra. """ # Do fit for first file to get length df_peaks, Diad, fig=identify_diad_peaks( config=config, path=spectra_path, filename=Diad_Files[0], filetype=filetype, plot_figure=plot_figure) # Now do for all files fit_params = pd.DataFrame([]) x_cord=Diad[:, 0] data_y_all=np.zeros([ len(x_cord), len(Diad_Files)], float) data_x_all=np.zeros([ len(x_cord), len(Diad_Files)], float) i=0 for file in tqdm(Diad_Files): df_peaks, Diad, fig=identify_diad_peaks( config=config, path=spectra_path, filename=file, filetype=filetype, plot_figure=plot_figure) data_y_all[:, i]=Diad[:, 1] data_x_all[:, i]=Diad[:, 0] #data = pd.concat([Diad, data], axis=0) fit_params = pd.concat([fit_params, df_peaks], axis=0) i=i+1 fit_params=fit_params.reset_index(drop=True) # Lets check all data x are the same. a=data_x_all same = np.all(a[0, :] == a[0, 0]) if same==True: return fit_params, data_y_all if same==False: print('You do not have the same x axis for all spectra. ') diff_val_row = np.where(np.any(a != a[0], axis=1))[0] print(diff_val_row) return fit_params, data_y_all
[docs] def plot_peak_params(fit_params, x_param='Diad1_pos', y1_param='approx_split', y2_param='Mean_Valley_prom', y3_param='C13_abs_prom', y4_param='HB2_abs_prom', fill_na=0): """ Makes a 2X2 figure of fit parameters, with the same x axis, and 4 different y axis Parameters ----------- fit_params: Pandas DataFrame dataframe of approximate fit params from function loop_approx_diad_fits x_param: str parameter you want on the x axis of all plots y1_param: str parameter on y axis of 1st subplot (top left) y2_param: str parameter on y axis of 2nd subplot (top right) y3_param: str parameter on y axis of 3rd subplot (bottom left) y4_param: str parameter on y axis of 4th subplot (bottom right) fill_na: int The integer value to fill Nans with to show up on plots. Returns ---------- 4 part figure """ fit_params_nona=fit_params.fillna(fill_na) fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(10*0.8,8*0.8)) ax1.plot(fit_params_nona[x_param], fit_params_nona[y1_param], 'xr') ax1.set_xlabel(x_param) ax1.set_ylabel(y1_param) ax2.plot(fit_params_nona[x_param], fit_params_nona[y2_param], 'xr') ax2.set_xlabel(x_param) ax2.set_ylabel(y2_param) fig.tight_layout() ax3.plot(fit_params_nona[x_param], fit_params_nona[y3_param], 'xr') ax3.set_xlabel(x_param) ax3.set_ylabel(y3_param) ax4.plot(fit_params_nona[x_param], fit_params_nona[y4_param], 'xr') ax4.set_xlabel(x_param) ax4.set_ylabel(y4_param) fig.tight_layout() return fig
[docs] def filter_splitting_prominence(*, fit_params, data_y_all, x_cord, splitting_limits=[100, 107], lower_diad1_prom=10, exclude_str=None, str_filt=None): """ Filters Spectra based on approximate splitting, draws a plot showing spectra to discard and those to keep Parameters -------------- fit_params: pd.dataframe dataframe of fit parameters from loop_approx_diad_fits data_y_all: np.array y coordinates of each spectra from loop_approx_diad_fits, used for plotting visualizatoins x_cord: np.array x coordinates of 1 spectra. Assumes all x coordinates the same length splitting_limits: list (e.g. [100, 107]) only keeps spectra with splitting in this range lower_diad1_prom: int, float Only keeps spectra that meet the splitting parameter, and have an absolute diad1 prominence greater than this value (helps filter out other weird spectra) exclude_str: str Excludes files with this string. str_filt: str Filters just based on string in filename Returns -------------- fit_params_filt: pd.DataFrame dataframe of fit parameters for spectra to keep data_y_filt: np.array y coordinates of spectra to keep fit_params_disc: pd.DataFrame dataframe of fit parameters for spectra to discard data_y_disc: np.array y coordinates of spectra to discard """ if str_filt is not None: filt=fit_params['filename'].str.contains(str_filt) else: reas_split=(fit_params['approx_split'].between(splitting_limits[0], splitting_limits[1])) reas_heigh=fit_params['Diad1_abs_prom']>lower_diad1_prom if exclude_str is not None: name_in_file=~fit_params['filename'].str.contains(exclude_str) else: name_in_file=reas_heigh filt=reas_split&reas_heigh&name_in_file fit_params_filt=fit_params.loc[filt].reset_index(drop=True) fit_params_disc=fit_params.loc[~(filt)].reset_index(drop=True) print('Keeping N='+str(len(fit_params_filt))) print('Discarding N='+str(len(fit_params_disc))) # Then apply to get data data_y_filt=data_y_all[:, (filt)] data_y_disc=data_y_all[:, ~(filt)] intc=800 prom_filt=0 prom_disc=0 fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12,5)) ax1.set_title('Spectra to Discard') ax2.set_title('Spectra to Keep') if sum(~filt)>0: for i in range(0, np.shape(data_y_disc)[1]): av_prom_disc=np.abs(np.nanmedian(fit_params_disc['Diad1_abs_prom'])/intc) Diff=np.nanmax(data_y_disc[:, i])-np.nanmin(data_y_disc[:, i]) av_prom_Keep=fit_params_disc['Diad1_abs_prom'].iloc[i] prom_disc=prom_disc+av_prom_disc ax1.plot(x_cord+i*5, (data_y_disc[:, i]-np.nanmin(data_y_disc[:, i]))/Diff+i/3, '-r', lw=0.5) yplot=np.quantile((data_y_disc[:, i]-np.nanmin(data_y_disc[:, i]))/Diff+i/3, 0.65) file=fit_params_disc['filename'].iloc[i] file = file.replace("_CRR_DiadFit", "") ax1.annotate(str(file), xy=(1450+i*5, yplot), xycoords="data", fontsize=8, bbox=dict(facecolor='white', edgecolor='none', pad=2)) ax1.set_xlim([1250, 1500+i*5]) ax1.set_xticks([]) ax1.set_yticks([]) if sum(filt)>0: for i in range(0, np.shape(data_y_filt)[1]): Diff=np.nanmax(data_y_filt[:, i])-np.nanmin(data_y_filt[:, i]) av_prom_Keep=fit_params_filt['Diad1_abs_prom'].iloc[i] prom_filt=prom_filt+av_prom_Keep file=fit_params_filt['filename'].iloc[i] ax2.plot(x_cord+i*5, (data_y_filt[:, i]-np.nanmin(data_y_filt[:, i]))/Diff+i/3, '-b', lw=0.5) yplot=np.quantile((data_y_filt[:, i]-np.nanmin(data_y_filt[:, i]))/Diff+i/3, 0.65) ax2.annotate(str(file), xy=(1450+i*5, yplot), xycoords="data", fontsize=8, bbox=dict(facecolor='white', edgecolor='none', pad=2)) ax2.set_xlim([1250, 1450+i*5]) ax2.set_xticks([]) ax2.set_yticks([]) return fit_params_filt.reset_index(drop=True), data_y_filt, fit_params_disc.reset_index(drop=True), data_y_disc
[docs] def filter_by_string(*, fit_params, data_y_all, x_cord, str_filt=None): """ Filters Spectra based on approximate splitting, draws a plot showing spectra to discard and those to keep Parameters -------------- fit_params: pd.dataframe dataframe of fit parameters from loop_approx_diad_fits data_y_all: np.array y coordinates of each spectra from loop_approx_diad_fits, used for plotting visualizatoins x_cord: np.array x coordinates of 1 spectra. Assumes all x coordinates the same length str_filt: str Filters just based on string in filename. Keeps files with string in, discards those without. Returns -------------- fit_params_filt: pd.DataFrame dataframe of fit parameters for spectra to keep data_y_filt: np.array y coordinates of spectra to keep fit_params_disc: pd.DataFrame dataframe of fit parameters for spectra to discard data_y_disc: np.array y coordinates of spectra to discard """ filt=fit_params['filename'].str.contains(str_filt) fit_params_filt=fit_params.loc[filt].reset_index(drop=True) fit_params_disc=fit_params.loc[~(filt)].reset_index(drop=True) print('Keeping N='+str(len(fit_params_filt))) print('Discarding N='+str(len(fit_params_disc))) # Then apply to get data data_y_filt=data_y_all[:, (filt)] data_y_disc=data_y_all[:, ~(filt)] intc=800 prom_filt=0 prom_disc=0 fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12,5)) ax1.set_title('Samples') ax2.set_title('Standards') if sum(~filt)>0: for i in range(0, np.shape(data_y_disc)[1]): av_prom_disc=np.abs(np.nanmedian(fit_params_disc['Diad1_abs_prom'])/intc) Diff=np.nanmax(data_y_disc[:, i])-np.nanmin(data_y_disc[:, i]) av_prom_Keep=fit_params_disc['Diad1_abs_prom'].iloc[i] prom_disc=prom_disc+av_prom_disc ax1.plot(x_cord+i*5, (data_y_disc[:, i]-np.nanmin(data_y_disc[:, i]))/Diff+i/3, '-r', lw=0.5) yplot=np.quantile((data_y_disc[:, i]-np.nanmin(data_y_disc[:, i]))/Diff+i/3, 0.65) file=fit_params_disc['filename'].iloc[i] file = file.replace("_CRR_DiadFit", "") ax1.annotate(str(file), xy=(1450+i*5, yplot), xycoords="data", fontsize=8, bbox=dict(facecolor='white', edgecolor='none', pad=2)) ax1.set_xlim([1250, 1500+i*5]) ax1.set_xticks([]) ax1.set_yticks([]) if sum(filt)>0: for i in range(0, np.shape(data_y_filt)[1]): Diff=np.nanmax(data_y_filt[:, i])-np.nanmin(data_y_filt[:, i]) av_prom_Keep=fit_params_filt['Diad1_abs_prom'].iloc[i] prom_filt=prom_filt+av_prom_Keep file=fit_params_filt['filename'].iloc[i] ax2.plot(x_cord+i*5, (data_y_filt[:, i]-np.nanmin(data_y_filt[:, i]))/Diff+i/3, '-b', lw=0.5) yplot=np.quantile((data_y_filt[:, i]-np.nanmin(data_y_filt[:, i]))/Diff+i/3, 0.65) ax2.annotate(str(file), xy=(1450+i*5, yplot), xycoords="data", fontsize=8, bbox=dict(facecolor='white', edgecolor='none', pad=2)) ax2.set_xlim([1250, 1450+i*5]) ax2.set_xticks([]) ax2.set_yticks([]) return fit_params_filt.reset_index(drop=True), data_y_filt, fit_params_disc.reset_index(drop=True), data_y_disc
[docs] def identify_diad_group(*, fit_params, data_y, x_cord, filter_bool,y_fig_scale=0.1, grp_filter='Weak', str_filt=None): """Sorts diads into two groups. Those meeting the 'filter_bool' criteria, and those not meeting this criteria. Ones meeting the criteria are shown on the left hand plot, ones not meeting the criteria are shown in the right hand plot Parameters ----------------- fit_params: pd.DataFrame dataframe of approximate peak parameters from loop_approx_diad_fits data_y: np.array y coordinates of each file in fit_params x_cord: np.array x coordinate of 1 spectra. Assumes all spectra have the same xcoordinate filter_bool: Pandas.series of bool (e.g. True, False, True, False) This is the filter to use to subdivide spectra into two groups. y_fig_scale: float, int (0.1) Determins how far apart the spectra are displayed on the plot. Larger number, the longer the plot is (and easier spectra are to see). The total height of the figure is 2+y_fig_scale*number of spectra grp_filter: str The name of the group where the filter_bool is true. E.g. if you apply a filter to get the weak diads, enter 'Weak' here. If filter_bool is to identify Strong spectra, enter 'Medium-Strong' here. Returns ----------------- pd.DataFrame of fit parameters where filter_bool is True pd.DataFrame of fit parameters where filter_bool is False np.array of y coordinates where filter_bool is True np.array of y coordinates where filter_bool is False """ if str_filt is not None: filt_name=~fit_params['filename'].str.contains(str_filt) filt_bool=filt_name&filt_bool if np.shape(data_y)[1]==0: Group1_df=pd.DataFrame().reindex_like(fit_params) Groupnot1_df=pd.DataFrame().reindex_like(fit_params) Group1_np_y=np.zeros(0, dtype='float') Groupnot1_np_y=np.zeros(0, dtype='float') return Group1_df, Groupnot1_df, Group1_np_y, Groupnot1_np_y else: # Grp1 is where the bool is true. grp1=filter_bool fit_params_notgrp1=fit_params.loc[~grp1] # Find ones in group1, in dataframe and numpy form Group1_df=fit_params.loc[grp1]#.reset_index(drop=True) Group1_df_reset=fit_params.loc[grp1].reset_index(drop=True) index_Grp1=Group1_df.index Group1_np_y=data_y[:, index_Grp1] # Ones not in group1 Groupnot1_df=fit_params.loc[~grp1]#.reset_index(drop=True) Groupnot1_df_reset=fit_params.loc[~grp1].reset_index(drop=True) index_Grpnot1=Groupnot1_df.index Groupnot1_np_y=data_y[:, index_Grpnot1] fig, (ax0, ax1) = plt.subplots(1, 2, figsize=(12, 2+y_fig_scale*len(fit_params))) intc=8 # if sum(grp1)>0: if sum(grp1)==1: i=0 av_prom_disc=np.abs(np.nanmedian(Group1_df['Diad1_abs_prom'])/intc) Diff=np.nanmax(Group1_np_y[:, i])-np.nanmin(Group1_np_y[:, i]) ax0.plot(x_cord-i*5, (Group1_np_y[:, i]-np.nanmin(Group1_np_y[:, i]))/Diff+i/3, '-r', lw=0.5) yplot=np.quantile((Group1_np_y[:, i]-np.nanmin(Group1_np_y[:, i]))/Diff+i/3, 0.65) file=Group1_df_reset['filename'].iloc[i] file = file.replace("_CRR_DiadFit", "") ax0.annotate(str(file), xy=(1450-i*5, yplot), xycoords="data", fontsize=8, bbox=dict(facecolor='white', edgecolor='none', pad=2)) else: for i in range(0, np.shape(Group1_np_y)[1]): av_prom_disc=np.abs(np.nanmedian(Group1_df['Diad1_abs_prom'])/intc) Diff=np.nanmax(Group1_np_y[:, i])-np.nanmin(Group1_np_y[:, i]) ax0.plot(x_cord-i*5, (Group1_np_y[:, i]-np.nanmin(Group1_np_y[:, i]))/Diff+i/3, '-r', lw=0.5) yplot=np.quantile((Group1_np_y[:, i]-np.nanmin(Group1_np_y[:, i]))/Diff+i/3, 0.5) file=Group1_df_reset['filename'].iloc[i] file = file.replace("_CRR_DiadFit", "") ax0.annotate(str(file), xy=(1450-i*5, yplot), xycoords="data", fontsize=8, bbox=dict(facecolor='white', edgecolor='none', pad=2)) ax0.set_xlim([1250-i*5, 1550]) ax0.set_xticks([]) ax0.set_yticks([]) # av_prom_Group1=np.abs(np.nanmedian(Group1_df[x_param])/intc) # ax0.plot(x_cord, Group1_np_y[:, i]+av_prom_Group1*i, '-r') if sum(~grp1)>0: if sum(~grp1)==1: j=0 av_prom_disc=np.abs(np.nanmedian(Groupnot1_df['Diad1_abs_prom'])/intc) Diff=np.nanmax(Groupnot1_np_y[:, j])-np.nanmin(Groupnot1_np_y[:, j]) ax1.plot(x_cord-j*5, (Groupnot1_np_y[:, j]-np.nanmin(Groupnot1_np_y[:, j]))/Diff+j/3, '-k', lw=0.5) yplot=np.quantile((Groupnot1_np_y[:, j]-np.nanmin(Groupnot1_np_y[:, j]))/Diff+j/3, 0.65) file=Groupnot1_df_reset['filename'].iloc[j] file = file.replace("_CRR_DiadFit", "") ax1.annotate(str(file), xy=(1450-j*5, yplot), xycoords="data", fontsize=8, bbox=dict(facecolor='white', edgecolor='none', pad=2)) else: for j in range(0, np.shape(Groupnot1_np_y)[1]): av_prom_disc=np.abs(np.nanmedian(Groupnot1_df['Diad1_abs_prom'])/intc) Diff=np.nanmax(Groupnot1_np_y[:, j])-np.nanmin(Groupnot1_np_y[:, j]) ax1.plot(x_cord-j*5, (Groupnot1_np_y[:, j]-np.nanmin(Groupnot1_np_y[:, j]))/Diff+j/3, '-k', lw=0.5) yplot=np.quantile((Groupnot1_np_y[:, j]-np.nanmin(Groupnot1_np_y[:, j]))/Diff+j/3, 0.5) file=Groupnot1_df_reset['filename'].iloc[j] file = file.replace("_CRR_DiadFit", "") ax1.annotate(str(file), xy=(1450-j*5, yplot), xycoords="data", fontsize=8, bbox=dict(facecolor='white', edgecolor='none', pad=2)) ax1.set_xlim([1250-j*5, 1550]) ax1.set_xticks([]) ax1.set_yticks([]) if grp_filter=='Medium-Strong': ax0.set_title('Classified as Strong') ax1.set_title('Classified as Medium') if grp_filter=='Weak': ax0.set_title('Classified as Weak') ax1.set_title('Ones left (not classified yet)') plt.subplots_adjust(wspace=0) return Group1_df.reset_index(drop=True), Groupnot1_df.reset_index(drop=True),Group1_np_y, Groupnot1_np_y
[docs] def plot_diad_groups(*, x_cord, Weak_np=None, Medium_np=None, Strong_np=None, y_fig_scale=0.5): """ This makes a figure showing spectra classified into Weak, Medium and Strong groups on a 3 panel plot Parameters -------------- x_cord: np.array x_coordinate for all spectra (must be the same) Weak_np: np.array y coordinates of spectra classified as weak Medium_np: np.array y coordinates of spectra classified as medium Strong_np: np.array y coordinates of spectra classified as strong y_fig_scale: float The total height of the figure is 2+y_fig_scale*total number of spectra entered. Returns ------------- figure showing 3 diad groups. """ # if len(Weak_np)>0: Num_Weak=np.shape(Weak_np)[1] else: Num_Weak=0 if len(Medium_np)>0: Num_Medium=np.shape(Medium_np)[1] else: Num_Medium=0 if len(Strong_np)>0: Num_Strong=np.shape(Strong_np)[1] else: Num_Strong=0 Total=Num_Strong+Num_Medium+Num_Weak fig, (ax0, ax1, ax2) = plt.subplots(1, 3, figsize=(15,2+y_fig_scale*Total)) if Num_Weak>0: for i in range(0, np.shape(Weak_np)[1]): Diff=np.nanmax(Weak_np[:, i])-np.nanmin(Weak_np[:, i]) ax0.plot(x_cord-i*5, (Weak_np[:, i]-np.nanmin(Weak_np[:, i]))/Diff+i/3, '-r', lw=0.5) ax0.set_xlim([1250-i*5, 1450]) ax0.set_xticks([]) ax0.set_yticks([]) if Num_Medium>0: for i in range(0, np.shape(Medium_np)[1]): Diff=np.nanmax(Medium_np[:, i])-np.nanmin(Medium_np[:, i]) ax1.plot(x_cord-i*5, (Medium_np[:, i]-np.nanmin(Medium_np[:, i]))/Diff+i/3, '-b', lw=0.5) ax1.set_xlim([1250-i*5, 1450]) ax1.set_xticks([]) ax1.set_yticks([]) if Num_Strong>0: for i in range(0, np.shape(Strong_np)[1]): Diff=np.nanmax(Strong_np[:, i])-np.nanmin(Strong_np[:, i]) ax2.plot(x_cord-i*5, (Strong_np[:, i]-np.nanmin(Strong_np[:, i]))/Diff+i/3, '-g', lw=0.5) ax2.set_xlim([1250-i*5, 1450]) ax2.set_xticks([]) ax2.set_yticks([]) ax0.set_title('Weak, N='+str(Num_Weak)) ax1.set_title('Medium, N='+str(Num_Medium)) ax2.set_title('Strong, N='+str(Num_Strong)) plt.subplots_adjust(wspace=0) return fig
[docs] def remove_diad_baseline(*, path=None, filename=None, Diad_files=None, filetype='Witec_ASCII', diad_array=None, exclude_range1=None, exclude_range2=None,N_poly=1, x_range_baseline=10, lower_bck=[1200, 1250], upper_bck=[1320, 1330], sigma=4, plot_figure=True): """ This function fits an Nth degree polynomial fitted through spectra points between two background positions. It returns baseline-subtracted data Parameters ----------- path: str Folder where spectra is stored filename: str Specific file being read OR Diad_files: str Filename if in same folder as script. filetype: str Identifies type of file. choose from 'Witec_ASCII', 'headless_txt', 'headless_csv', 'head_csv', 'Witec_ASCII', 'HORIBA_txt', 'Renishaw_txt' exclude_range1: None or list length 2 Excludes a region, e.g. a cosmic ray exclude_range2: None or list length 2 Excludes a region, e.g. a cosmic ray sigma: int Filters out points that are more than this number of sigma from the median of the baseline. N_poly: int Degree of polynomial to fit to the backgroun lower_bck: list len 2 wavenumbers of baseline region to the left of the peak of interest upper_bck: list len 2 wavenumbers to the right of the peak of interest plot_figure: bool if True, plots figure, if False, doesn't. Returns ----------- y_corr, Py_base, x, Diad_short, Py_base, Pf_baseline, Baseline_ysub, Baseline_x, Baseline, span y_corr: Background subtracted y values trimmed in baseline range x: initial x values trimmed in baseline range Diad_short: Initial data (x and y) trimmed in baseline range Py_base: Fitted baseline for trimmed x coordinates Pf_baseline: polynomial model fitted to baseline Baseline_ysub: Baseline fitted for x coordinates in baseline Baseline_x: x co-ordinates in baseline. Baseline: Filtered to remove points outside sigma*std dev of baseline span: lower and upper x coordinate of region that isnt the baseline """ if diad_array is None: Diad_df=get_data(path=path, filename=filename, filetype=filetype) Diad=np.array(Diad_df) else: Diad=diad_array if exclude_range1 is not None and exclude_range2 is None: Diad_old=Diad.copy() Diad=Diad[(Diad[:, 0]<exclude_range1[0])|(Diad[:, 0]>exclude_range1[1])] Discard=Diad_old[(Diad_old[:, 0]>=exclude_range1[0]) & (Diad_old[:, 0]<=exclude_range1[1])] if exclude_range2 is not None and exclude_range1 is None: Diad_old=Diad.copy() Diad=Diad[(Diad[:, 0]<exclude_range2[0])|(Diad[:, 0]>exclude_range2[1])] Discard=Diad_old[(Diad_old[:, 0]>=exclude_range2[0]) & (Diad_old[:, 0]<=exclude_range2[1])] if exclude_range1 is not None and exclude_range2 is not None: Diad_old=Diad.copy() Diad=Diad[ ((Diad[:, 0]<exclude_range1[0])|(Diad[:, 0]>exclude_range1[1])) & ((Diad[:, 0]<exclude_range2[0])|(Diad[:, 0]>exclude_range2[1])) ] Discard=Diad_old[ ((Diad_old[:, 0]>=exclude_range1[0]) & (Diad_old[:, 0]<=exclude_range1[1])) | ((Diad_old[:, 0]>=exclude_range2[0]) & (Diad_old[:, 0]<=exclude_range2[1])) ] lower_0baseline=lower_bck[0] upper_0baseline=lower_bck[1] lower_1baseline=upper_bck[0] upper_1baseline=upper_bck[1] # Bit that is actually peak, not baseline span=[upper_0baseline, lower_1baseline] # lower_2baseline=1320 # upper_2baseline=1330 # Trim for entire range Diad_short=Diad[ (Diad[:,0]>lower_0baseline) & (Diad[:,0]<upper_1baseline) ] # Get actual baseline Baseline_with_outl=Diad_short[ ((Diad_short[:, 0]<upper_0baseline) &(Diad_short[:, 0]>lower_0baseline)) | ((Diad_short[:, 0]<upper_1baseline) &(Diad_short[:, 0]>lower_1baseline))] # Calculates the median for the baseline and the standard deviation Median_Baseline=np.mean(Baseline_with_outl[:, 1]) Std_Baseline=np.std(Baseline_with_outl[:, 1]) # Removes any points in the baseline outside of 2 sigma (helps remove cosmic rays etc). Baseline=Baseline_with_outl[(Baseline_with_outl[:, 1]<Median_Baseline+sigma*Std_Baseline) & (Baseline_with_outl[:, 1]>Median_Baseline-sigma*Std_Baseline) ] #Baseline=Baseline_with_outl # Fits a polynomial to the baseline of degree # Fits a polynomial to the baseline of degree Pf_baseline = np.poly1d(np.polyfit(Baseline[:, 0], Baseline[:, 1], N_poly)) Py_base = Pf_baseline(Diad_short[:, 0]) Baseline_ysub = Pf_baseline(Baseline[:, 0]) Baseline_x = Baseline[:, 0] y_corr = Diad_short[:, 1] - Py_base x = Diad_short[:, 0] # Now filter out all the points I dont want in my background subtracted data # 1) points rejected by the sigma filter (from the baseline-candidate set) sigma_rejected_x = Baseline_with_outl[ (Baseline_with_outl[:, 1] >= Median_Baseline + sigma * Std_Baseline) | (Baseline_with_outl[:, 1] <= Median_Baseline - sigma * Std_Baseline) ][:, 0] drop = np.zeros(len(x), dtype=bool) # 2) points in user-specified exclude ranges (if provided) if exclude_range1 is not None: drop |= (x >= exclude_range1[0]) & (x <= exclude_range1[1]) if exclude_range2 is not None: drop |= (x >= exclude_range2[0]) & (x <= exclude_range2[1]) # 3) points sigma-filtered out of the baseline windows if sigma_rejected_x.size > 0: drop |= np.isin(x, sigma_rejected_x) # apply the mask consistently to all aligned arrays/outputs keep = ~drop x = x[keep] y_corr = y_corr[keep] Py_base = Py_base[keep] Diad_short = Diad_short[keep, :] # Plotting what its doing if plot_figure is True: fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12,4)) ax1.set_title('Background fit') ax1.plot(Diad[:, 0], Diad[:, 1], '-', color='grey') ax1.plot(Diad_short[:, 0], Diad_short[:, 1], '-r', label='region_2_bck_sub') #ax1.plot(Baseline[:, 0], Baseline[:, 1], '-b', label='Bck points') ax1.plot(Baseline[:, 0], Baseline[:, 1], '.b', label='Bck points') ax1.plot(Diad_short[:, 0], Py_base, '-k') ax1_ymin=np.min(Baseline[:, 1])-10*np.std(Baseline[:, 1]) ax1_ymax=np.max(Baseline[:, 1])+10*np.std(Baseline[:, 1]) ax1_xmin=lower_0baseline-30 ax1_xmax=upper_1baseline+30 # Adding patches rect_diad1_b1=patches.Rectangle((lower_0baseline, ax1_ymin),upper_0baseline-lower_0baseline,ax1_ymax-ax1_ymin, linewidth=1,edgecolor='none',facecolor='cyan', label='bck', alpha=0.3, zorder=0) ax1.add_patch(rect_diad1_b1) rect_diad1_b2=patches.Rectangle((lower_1baseline, ax1_ymin),upper_1baseline-lower_1baseline,ax1_ymax-ax1_ymin, linewidth=1,edgecolor='none',facecolor='cyan', alpha=0.3, zorder=0) ax1.add_patch(rect_diad1_b2) ax1.set_xlim([ax1_xmin, ax1_xmax]) ax1.set_ylim([ax1_ymin, ax1_ymax]) ax1.set_ylabel('Intensity') ax2.set_ylabel('Intensity') ax2.set_xlabel('Wavenumber') ax1.legend() ax2.set_title('Background subtracted') ax2.plot(x, y_corr, '-r') height_p=np.max(Diad_short[:, 1])-np.min(Diad_short[:, 1]) ax2.set_ylim([np.min(y_corr), 1.2*height_p ]) ax1.set_xlabel('Wavenumber') return y_corr, Py_base, x, Diad_short, Py_base, Pf_baseline, Baseline_ysub, Baseline_x, Baseline, span
[docs] def add_peak(*, prefix=None, center=None, model_name='VoigtModel', min_cent=None, max_cent=None, min_sigma=None, max_sigma=None, amplitude=100, min_amplitude=None, max_amplitude=None, sigma=0.2): """ This function iteratively adds a peak using lmfit, used for iteratively fitting HB, C13 etc. Parameters -------------- prefix: str Prefix added to peak name, e.g. 'diad1', so it reads diad1_center, diad1_sigma etc. center: float, int Estimate of peak center (e.g. x coordinate of peak) sigma: float, int (default 0.2) Estimate of sigma of peak fit amplitude: float, int Estimate of amplitude of peakf it model_name: str 'VoigtModel' or 'PseudoVoigtModel' Type of peak which is being added. Optional: min_cent, max_cent: float Minimum and maximum x coordinate to place peak center at min_sigma, max_sigma: float Minimum and maximum sigma of peak fit min_amplitude, max_amplitude: float Minimum and maximum sigma of peak fit Returns --------------- peak: Fitted model params: Peak fit parameters """ Model_combo = globals()[model_name](prefix=prefix) peak = Model_combo pars = peak.make_params() if min_cent is not None and max_cent is not None: pars[prefix + 'center'].set(center, min=min_cent, max=max_cent) else: pars[prefix + 'center'].set(center) pars[prefix + 'amplitude'].set(amplitude, min=min_amplitude, max=max_amplitude) if min_sigma is not None: pars[prefix+'sigma'].set(sigma, max=max_sigma) if min_sigma is not None and max_sigma is not None: pars[prefix+'sigma'].set(sigma, max=max_sigma, min=min_sigma) else: pars[prefix + 'sigma'].set(sigma, min=0) return peak, pars
## Overall function for fitting diads in 1 single step
[docs] @dataclass class diad1_fit_config: """ Configuration class for fitting Diad 1 This class stores configuration parameters for fitting Diad 1 and associated peaks. The class allows for easy customization of the fitting process. Attributes: model_name (str): Default 'PseudoVoigtModel'. Model name for peak fitting. Choose from 'PseudoVoigtModel', 'VoigtModel', 'GaussianModel', 'LorentzianModel', etc. fit_peaks (int): Default 2 Number of peaks to fit. 2 = Diad + HB, 1 = Diad. N_poly_bck_diad1 (float): Default = 1 Degree of polynomial for background subtraction. Default 1. lower_bck_diad1 (Tuple[float, float]): Lower boundary for background fitting in cm-1. Default (1180, 1220) upper_bck_diad1 (Tuple[float, float]): Upper boundary for background fitting in cm-1 Default (1300, 1350). fit_gauss (Optional[bool]): Default True Fit a Gaussian peak if True. Helpful for very elevated spectra gauss_amp (Optional[float]): Default = 1000 Gaussian peak amplitude if `fit_gauss` is True. Default 1000. We find 2X HB intensity is a good guess on many instruments diad_sigma (float): Default = 0.2 Sigma of diad peak. diad_sigma_min_allowance (float): Default = 0.2 Tolerance on entered sigma. Means minimum allowed sigma is 0.2*sigma diad_sigma_max_allowance (float): Default = 5 Tolerance on entered sigma. Means minimum allowed sigma is 5*sigma diad_prom (float): Default = 100 Peak amplitude for of the diad. Suggest users obtain from the estimated peak parameter dataframe HB_prom (float): Default = 20 Peak amplitude for HB. Suggest users obtain from the estimated peak parameter dataframe HB_sigma_min_allowance (float): Default = 0.05 Means HB sigma cant be less than 0.05* sigma of first fitting peak (diad). HB_sigma_max_allowance (float): Default = 3: Means HB sigma cant exceed 3* sigma of first fitting peak (diad) HB_amp_min_allowance (float): Default =0.01 Means HB amplitude cant be less than 0.01*amplitude of first fitted peak (diad). HB_amp_max_allowance (float): Default = 1 Means HB amplitude cant be more than 1*amplitude of first fitted peak (diad). x_range_baseline (float): Default 75. Means that x axis of baseline shows diad position +- 75 x units. y_range_baseline (float): Y-axis range for baseline display. Default 100. Shows a y axis scale that is 100 y units above the minimum baseline value, and 100 units above the maximum value dpi (float): Default 200 Figure resolution in dots per inch. Default 200. x_range_residual (float): Default 20. Shows x values +-20 either side of the diad peak position in the residual plot. return_other_params (bool): Return non-fitted parameters if True. Default False. Methods: update_par(**kwargs): Updates configuration parameters. Raises AttributeError for unknown attributes. """ # What model to use model_name: str = 'PseudoVoigtModel' fit_peaks:int = 2 # Degree of polynomial to use N_poly_bck_diad1: float =1 # Background/baseline positions lower_bck_diad1: Tuple[float, float]=(1180, 1220) upper_bck_diad1: Tuple[float, float]=(1300, 1350) # Do you need a gaussian? Set position here if so fit_gauss: Optional [bool] =False gauss_amp: Optional [float]=1000 diad_sigma: float=0.2 diad_sigma_min_allowance: float=0.2 diad_sigma_max_allowance: float=5 # Peak amplitude diad_prom: float = 100 HB_prom: float = 20 HB_sigma_min_allowance=0.05 HB_sigma_max_allowance=3 HB_amp_min_allowance=0.01 HB_amp_max_allowance=1 # How much to show on x anx y axis of figure showing background x_range_baseline: float=75 y_range_baseline: float=100 #Do you want to save the figure? dpi: float = 200 x_range_residual: float=20 # Do you want to return other parameters? return_other_params: bool =False
[docs] def update_par(self, **kwargs): for key, value in kwargs.items(): if hasattr(self, key): setattr(self, key, value) else: raise AttributeError(f"'diad1_fit_config' object has no attribute '{key}'")
[docs] @dataclass class diad2_fit_config: """ Configuration class for fitting Diad 2 This class stores configuration parameters for fitting Diad 2 and associated peaks. The class allows for easy customization of the fitting process. Attributes: model_name (str): Default 'PseudoVoigtModel'. Model name for peak fitting. Choose from 'PseudoVoigtModel', 'VoigtModel', 'GaussianModel', 'LorentzianModel', etc. fit_peaks (int): Default 2 Number of peaks to fit. 3 = HB + Diad + C13, 2 = Diad + HB, 1 = Diad. N_poly_bck_diad2 (float): Default = 1 Degree of polynomial for background subtraction. Default 1. lower_bck_diad2 (Tuple[float, float]): Lower boundary for background fitting in cm-1. Default (1300, 1360) upper_bck_diad2 (Tuple[float, float]): Upper boundary for background fitting in cm-1 Default (1440, 1470) fit_gauss (Optional[bool]): Default True Fit a Gaussian peak if True. Helpful for very elevated spectra gauss_amp (Optional[float]): Default = 1000 Gaussian peak amplitude if `fit_gauss` is True. Default 1000. We find 2X HB intensity is a good guess on many instruments diad_sigma (float): Default = 0.2 Sigma of diad peak. diad_sigma_min_allowance (float): Default = 0.2 Tolerance on entered sigma. Means minimum allowed sigma is 0.2*sigma diad_sigma_max_allowance (float): Default = 5 Tolerance on entered sigma. Means minimum allowed sigma is 5*sigma diad_prom (float): Default = 100 Peak amplitude for of the diad. Suggest users obtain from the estimated peak parameter dataframe HB_prom (float): Default = 20 Peak amplitude for HB. Suggest users obtain from the estimated peak parameter dataframe C13_prom (float): Default = 10 Peak amplitude for C13. Suggest users obtain from the estimated peak parameter dataframe HB_sigma_min_allowance (float): Default = 0.05 Means HB sigma cant be less than 0.05* sigma of first fitting peak (diad). HB_sigma_max_allowance (float): Default = 3: Means HB sigma cant exceed 3* sigma of first fitting peak (diad) HB_amp_min_allowance (float): Default =0.01 Means HB amplitude cant be less than 0.01*amplitude of first fitted peak (diad). HB_amp_max_allowance (float): Default = 1 Means HB amplitude cant be more than 1*amplitude of first fitted peak (diad). x_range_baseline (float): Default 75. Means that x axis of baseline shows diad position +- 75 x units. y_range_baseline (float): Y-axis range for baseline display. Default 100. Shows a y axis scale that is 100 y units above the minimum baseline value, and 100 units above the maximum value dpi (float): Default 200 Figure resolution in dots per inch. Default 200. x_range_residual (float): Default 20. Shows x values +-20 either side of the diad peak position in the residual plot. return_other_params (bool): Return non-fitted parameters if True. Default False. Methods: update_par(**kwargs): Updates configuration parameters. Raises AttributeError for unknown attributes. """ # Do you need a gaussian? Set position here if so # What model to use model_name: str = 'PseudoVoigtModel' fit_peaks:int = 3 # Degree of polynomial to use N_poly_bck_diad2: float =1 # Threshold intesnity threshold_intensity=50 # Background/baseline positions lower_bck_diad2: Tuple[float, float]=(1300, 1360) upper_bck_diad2: Tuple[float, float]=(1440, 1470) fit_gauss: Optional [bool] =False gauss_amp: Optional [float]=1000 diad_sigma: float=0.2 diad_sigma_min_allowance: float=0.2 diad_sigma_max_allowance: float=5 # Peak amplitude diad_prom: float = 100 HB_prom: float = 20 C13_prom: float=10 HB_sigma_min_allowance=0.05 HB_sigma_max_allowance=3 HB_amp_min_allowance=0.01 HB_amp_max_allowance=1 # How much to show on x anx y axis of figure showing background with all peaks imposed x_range_baseline: float=75 y_range_baseline: float=100 #Do you want to save the figure? plot_figure: bool = True dpi: float = 200 x_range_residual: float=20 # Do you want to return other parameters? return_other_params: bool =False
[docs] def update_par(self, **kwargs): for key, value in kwargs.items(): if hasattr(self, key): setattr(self, key, value) else: raise AttributeError(f"'diad2_fit_config' object has no attribute '{key}'")
## Testing generic model
[docs] def fit_gaussian_voigt_generic_diad(config1, *, diad1=False, diad2=False, path=None, filename=None, xdat=None, ydat=None, span=None, plot_figure=True, dpi=200, Diad_pos=None, HB_pos=None, C13_pos=None, fit_peaks=None, block_print=True, py_baseline=None, minimise='weighted_least_squares'): """ This function fits a voigt/Pseudovoigt curves to background subtracted data for your diads +- HBs+-C13 peaks. It can also fit a second Gaussian background iteratively with these peaks Parameters ----------- config1: fit configuration file, from diad2_fit_config or diad1_fit_config data classes. Detailed descriptions of things in this file are found in docs for fit_diad_2_w_bck, and fit_diad_1_w_bck diad1: bool True if fitting Diad1, False if Diad2 diad2: bool True if fitting Diad2, False if Diad1 path: str Path to spectra folder. Used to make a folder inside this folder to store peak fit images in filename: str filename of specific diad being fitted xdat: np.array x coordinates of the spectra ydat: np.array y coordinates of the spectra span: list (e.g. [1100, 1300]) Comes from remove_diad_baseline function. Sets min and max x coordinate of new linspace for plotting the overall peak fit over the data plot_figure: bool if True, plots figure from this function specifically. Even if False, if called through fit_diad_2_w_bck or fit_diad_1_w_bck, get a figure from those functions with relevant data on dpi: int dpi of saved figure Diad_pos, HB_pos, C13_pos: float Initial guess of Diad, HB, C13 pos fit_peaks: int Number of peaks to fit. 1= just diad, 2 = diad + HB, 3 = diad+ HB +C13 block_print: bool If true, prints a few status updates as it goes to help debug py_baseline: the baseline from the diadbaseline function minimise='weighted_least_squares' or 'least squares' uses a weighted least squares cost function. Prior to 1.0.19, least squares was default. Returns ------------------- result, df_out, y_best_fit, x_lin, components, xdat, ydat, ax1_xlim, ax2_xlim, config1.fit_gauss, residual_diad_coords, ydat_inrange, xdat_inrange result: lmfit.model.ModelResult (model form lmfit) df_out: dataframe of peak fit parameters y_best_fit: np.array of best peak fit y parameters x_lin: np.array of x coordinates corresponding to y_best_fit components: y coordinates of the different components of the peak fit. xdat: input xdat ydat: input ydat ax1_xlim, ax2_xlim: xlim for plot in final figure in fit_w_bck for different fit components residual_diad_coordinates: np.array of y coordinate of residual plot ydat_inrange: np.array of data within 'span' xdat_inrange:np.array of data within 'span' """ # Calculate the amplitude from the sigma and the prominence calc_diad_amplitude=((config1.diad_sigma)*(config1.diad_prom))/0.3939 calc_HB_amplitude=((config1.diad_sigma)*(config1.HB_prom))/0.3939 # If ask for Gaussian, but fit peaks is 1, remove gaussian if fit_peaks==1 and config1.fit_gauss is True: config1.fit_gauss=False if diad2 is True and fit_peaks==3: calc_C13_amplitude=(0.5*(config1.diad_sigma)*(config1.C13_prom))/0.3939 # Gets overridden if you have triggered any of the warnings refit=False refit_param='Flagged Warnings:' spec_res=np.abs(xdat[1]-xdat[0]) initial_guess=Diad_pos HB_initial_guess=HB_pos C13_initial_guess=C13_pos model_ini = globals()[config1.model_name]() # Create initial peak params # Set peak position to 2 spectral res units either side of the initial guess made above params_ini = model_ini.make_params(center=initial_guess, max=initial_guess+spec_res*5, min=initial_guess-spec_res*5) # Set the amplitude to within X0.1 or 10X of the guessed amplitude params_ini['amplitude'].set(calc_diad_amplitude, min=calc_diad_amplitude/5, max=calc_diad_amplitude*5) # Set sigma to be the entered value, then params_ini['sigma'].set(config1.diad_sigma, min=config1.diad_sigma*config1.diad_sigma_min_allowance, max=config1.diad_sigma*config1.diad_sigma_max_allowance) # Now lets tweak an initial model with just 1 peak, this really helps with the fit init_ini = model_ini.eval(params_ini, x=xdat) result_ini = model_ini.fit(ydat, params_ini, x=xdat) comps_ini = result_ini.eval_components() Center_ini=result_ini.params.get('center') Amplitude_ini=result_ini.params.get('amplitude') sigma_ini=result_ini.params.get('sigma') fwhm_ini=result_ini.params.get('fwhm') # For relatively weak peaks, you wont want a gaussian background if config1.fit_gauss is False: # If there is 1 peak, e.g. if have a Nan for hotband if fit_peaks==1: model_F = globals()[config1.model_name](prefix='lz1_') pars1 = model_F.make_params() pars1['lz1_'+ 'amplitude'].set(calc_diad_amplitude, min=0, max=calc_diad_amplitude*10) pars1['lz1_'+ 'center'].set(Center_ini, min=Center_ini-5*spec_res, max=Center_ini+5*spec_res) pars1['lz1_'+ 'sigma'].set(config1.diad_sigma, min=config1.diad_sigma*config1.diad_sigma_min_allowance, max=config1.diad_sigma*config1.diad_sigma_max_allowance) params=pars1 # If there is more than one peak else: # Previous versions had a constant for PV and P4 but not Voigt. Now removed for all model1 = globals()[config1.model_name](prefix='lz1_') pars1 = model1.make_params() pars1['lz1_'+ 'amplitude'].set(calc_diad_amplitude, min=0, max=calc_diad_amplitude*10) pars1['lz1_'+ 'center'].set(Center_ini, min=Center_ini-3*spec_res, max=Center_ini+3*spec_res) pars1['lz1_'+ 'sigma'].set(config1.diad_sigma, min=config1.diad_sigma*config1.diad_sigma_min_allowance, max=config1.diad_sigma*config1.diad_sigma_max_allowance) if fit_peaks>1: model_F=model1 params=pars1 if fit_peaks==2: # Second wee peak prefix='lz2_' peak = globals()[config1.model_name](prefix='lz2_') pars = peak.make_params() pars[prefix + 'center'].set(HB_initial_guess, min=HB_initial_guess-2*spec_res, max=HB_initial_guess+2) pars[prefix + 'amplitude'].set(calc_HB_amplitude, min=Amplitude_ini*config1.HB_amp_min_allowance, max=Amplitude_ini*config1.HB_amp_max_allowance) pars[prefix+ 'sigma'].set(sigma_ini, min=sigma_ini*config1.HB_sigma_min_allowance, max=sigma_ini*config1.HB_sigma_max_allowance) model_F=model1+peak # updating pars 1 with values in pars pars1.update(pars) params=pars1 # Attempt at stabilizing peak fit result = model_F.fit(ydat, pars1, x=xdat) result.params['lz2_center'].vary = False result.params['lz2_amplitude'].vary = False result.params['lz2_sigma'].vary = False final_result = model_F.fit(ydat, result.params, x=xdat) params.update(final_result.params) params.update(final_result.params) result=final_result if fit_peaks==3: if block_print is False: print('Trying to iteratively fit 3 peaks') peak_pos_left=np.array([HB_initial_guess, C13_initial_guess]) for i, cen in enumerate(peak_pos_left): if i==0: # This is the hotband peak, pars = add_peak(prefix='lz%d_' % (i+2), center=cen, min_cent=cen-3*spec_res, max_cent=cen+3*spec_res, sigma=sigma_ini, min_sigma=sigma_ini*config1.HB_sigma_min_allowance, max_sigma=sigma_ini*config1.HB_sigma_max_allowance, amplitude=calc_HB_amplitude, min_amplitude=Amplitude_ini*config1.HB_amp_min_allowance, max_amplitude=Amplitude_ini*config1.HB_amp_max_allowance, model_name=config1.model_name) model = peak+model1 params.update(pars) if i==1: # This is c13 peak, pars = add_peak(prefix='lz%d_' % (i+2), center=cen, min_cent=cen-3*spec_res, max_cent=cen+3*spec_res, sigma=sigma_ini/5, min_sigma=sigma_ini/20, max_sigma=sigma_ini/2, amplitude=calc_C13_amplitude, min_amplitude=calc_C13_amplitude*(0.5*config1.HB_amp_min_allowance), max_amplitude=calc_C13_amplitude*2*(config1.HB_amp_max_allowance), model_name=config1.model_name) model = peak+model params.update(pars) model_F=model # Attempt at stabilizing peak fit result = model_F.fit(ydat, pars1, x=xdat) result.params['lz2_center'].vary = False result.params['lz2_amplitude'].vary = False result.params['lz2_sigma'].vary = False result.params['lz3_center'].vary = False result.params['lz3_amplitude'].vary = False result.params['lz3_sigma'].vary = False final_result = model_F.fit(ydat, result.params, x=xdat) params.update(final_result.params) params.update(final_result.params) result=final_result # Same, but also with a Gaussian Background if config1.fit_gauss is not False: # making the gaussian model model1 = GaussianModel(prefix='bkg_') params = model1.make_params() params['bkg_'+'amplitude'].set(config1.gauss_amp, min=config1.gauss_amp/100, max=config1.gauss_amp*10) params['bkg_'+'sigma'].set(sigma_ini*15, min=sigma_ini*10, max=sigma_ini*25) params['bkg_'+'center'].set(initial_guess, min=initial_guess-7, max=initial_guess+7) if fit_peaks==2: peak_pos_voigt=np.array([initial_guess, HB_initial_guess]) for i, cen in enumerate(peak_pos_voigt): if i==0: # This is the Diad peak, pars = add_peak(prefix='lz%d_' % (i+1), center=Center_ini, min_cent=cen-3*spec_res, max_cent=cen+3*spec_res, sigma=config1.diad_sigma, max_sigma=config1.diad_sigma*config1.diad_sigma_max_allowance, min_sigma=config1.diad_sigma*config1.diad_sigma_min_allowance, amplitude=calc_diad_amplitude, min_amplitude=0, max_amplitude=10*calc_diad_amplitude, model_name=config1.model_name) model = peak+model1 params.update(pars) if i==1: # This is the hotband peak, pars = add_peak(prefix='lz%d_' % (i+1), center=cen, min_cent=cen-3*spec_res, max_cent=cen+3*spec_res, sigma=sigma_ini, min_sigma=sigma_ini*config1.HB_sigma_min_allowance, max_sigma=sigma_ini*config1.HB_sigma_max_allowance, amplitude=calc_HB_amplitude, min_amplitude=Amplitude_ini*config1.HB_amp_min_allowance, max_amplitude=Amplitude_ini*config1.HB_amp_max_allowance, model_name=config1.model_name) model2 = peak+model params.update(pars) # Attempt at stabilizing peak fit # updating pars 1 with values in pars result = model2.fit(ydat, params, x=xdat) # Set the parameters of 'lz1' to be fixed result.params['lz2_center'].vary = False result.params['lz2_amplitude'].vary = False result.params['lz2_sigma'].vary = False result.params['bkg_center'].vary = False result.params['bkg_amplitude'].vary = False result.params['bkg_sigma'].vary = False # Perform fitting using the entire model (model2) with the stabilized 'lz1' parameters final_result = model2.fit(ydat, result.params, x=xdat) center_error = final_result.params['lz1_center'].stderr params.update(final_result.params) result=final_result model_F=model2 if fit_peaks==3: peak_pos_voigt=np.array([initial_guess, HB_initial_guess, C13_initial_guess]) for i, cen in enumerate(peak_pos_voigt): if i==0: # This is the Diad peak, pars = add_peak(prefix='lz%d_' % (i+1), center=Center_ini, min_cent=cen-3*spec_res, max_cent=cen+3*spec_res, sigma=config1.diad_sigma, max_sigma=config1.diad_sigma*config1.diad_sigma_max_allowance, min_sigma=config1.diad_sigma*config1.diad_sigma_min_allowance, amplitude=calc_diad_amplitude, min_amplitude=0, max_amplitude=10*calc_diad_amplitude, model_name=config1.model_name) model = peak+model1 params.update(pars) if i==1: # This is the hotband peak, pars = add_peak(prefix='lz%d_' % (i+1), center=cen, min_cent=cen-3*spec_res, max_cent=cen+3*spec_res, sigma=sigma_ini, min_sigma=sigma_ini*config1.HB_sigma_min_allowance, max_sigma=sigma_ini*config1.HB_sigma_max_allowance, amplitude=calc_HB_amplitude, min_amplitude=Amplitude_ini*config1.HB_amp_min_allowance, max_amplitude=Amplitude_ini*config1.HB_amp_max_allowance, model_name=config1.model_name) model2 = peak+model params.update(pars) if i==2: # This is c13 peak, pars = add_peak(prefix='lz%d_' % (i+1), center=cen, min_cent=cen-3*spec_res, max_cent=cen+3*spec_res, sigma=sigma_ini/5, min_sigma=sigma_ini/20, max_sigma=sigma_ini/2, amplitude=calc_C13_amplitude, min_amplitude=calc_C13_amplitude*(0.5*config1.HB_amp_min_allowance), max_amplitude=calc_C13_amplitude*2*(config1.HB_amp_max_allowance), model_name=config1.model_name) model3 = peak+model2 params.update(pars) result = model3.fit(ydat, params, x=xdat) # Set the parameters of 'lz1' to be fixed result.params['lz2_center'].vary = False result.params['lz2_amplitude'].vary = False result.params['lz2_sigma'].vary = False result.params['lz3_center'].vary = False result.params['lz3_amplitude'].vary = False result.params['lz3_sigma'].vary = False result.params['bkg_center'].vary = False result.params['bkg_amplitude'].vary = False result.params['bkg_sigma'].vary = False # Perform fitting using the entire model (model2) with the stabilized 'lz1' parameters final_result = model3.fit(ydat, result.params, x=xdat) center_error = final_result.params['lz1_center'].stderr result=final_result model_F=model3 params.update(final_result.params) if minimise=='least_squares': print('using least squares') # all points given equal weight - used by default # until Diadfit 1.0.18 init = model_F.eval(params, x=xdat) result = model_F.fit(ydat, params, x=xdat) comps = result.eval_components() elif minimise=='weighted_least_squares': print('using weighted least squares') # Operate on total acounts, so add back in baseline total_counts_obs = ydat + py_baseline # Calculate weights (1/sigma) weights = 1.0 / np.sqrt(np.maximum(total_counts_obs, 1.0)) # 4. Final fit # We use scale_covar=True to let lmfit scale the errors based on the # fit quality (the residuals), recomended if we dont know the absolute size of the errors/weights and we dont because there could easily be gain on the detector, other noise sources etc result = model_F.fit(ydat, params, x=xdat, weights=weights, scale_covar=True) else: raise ValueError( f"Invalid minimise option: '{minimise}'. " "Please specify minimise='least_squares' or " "minimise='weighted_least_squares'." ) # print(f"Fityk-style Weighted RedChi: {result.redchi:.4f}") # print(f"Error: {result.params['lz1_center'].stderr:.5f}") # # print('Weighting accounts for 5 averaged acquisitions and fitted baseline.') # print(f"Reduced Chi-Square 2: {result.redchi}") # 6. Evaluate components for plotting comps = result.eval_components() reduced_chi_squared = np.sqrt(result.redchi) # Other things #print(result.best_values) # Get first peak center Peak1_Cent=result.best_values.get('lz1_center') Peak1_Int=result.best_values.get('lz1_amplitude') Peak1_Sigma=result.best_values.get('lz1_sigma') Peak1_gamma=result.best_values.get('lz1_gamma') Peak1_Prop_Lor=result.best_values.get('lz1_fraction') Peak1_fwhm=result.params['lz1_fwhm'].value error_pk2 = result.params['lz1_center'].stderr x_lin=np.linspace(span[0], span[1], 2000) y_best_fit=result.eval(x=x_lin) components=result.eval_components(x=x_lin) if config1.fit_gauss is not False: Gauss_cent=result.best_values.get('bkg_center') Gauss_amp=result.best_values.get('bkg_amplitude') Gauss_sigma=result.best_values.get('bkg_sigma') max_Gauss=np.max(components.get('bkg_')) max_voigt=np.max(components.get('lz1_')) if max_Gauss>max_voigt/5: refit_param=str(refit_param)+ ' G_higher_than_diad' # if Gauss_sigma>=(config1.gauss_sigma*10-0.1): # refit=True # refit_param=str(refit_param)+' G_input_TooLowSigma' # if Gauss_sigma<=(config1.gauss_sigma/10+0.1): # refit=True # refit_param=str(refit_param)+ ' G_input_TooHighSigma' # # if Gauss_amp>=(config1.gauss_amp*10-0.1): # refit=True # refit_param=str(refit_param)+' G_input_TooLowAmp' # if Gauss_amp<=(config1.gauss_sigma+0.1): # refit=True # refit_param=str(refit_param)+' G_Amp_too_high' if Gauss_cent<=(config1.fit_gauss-30+0.5): refit=True refit_param=str(refit_param)+' G_InputTooLowCent' if Gauss_cent>=(initial_guess+30-0.5): refit=True refit_param=str(refit_param)+' G_InputTooHighCent' # print('fwhm gauss') # print(result.best_values) # # Work out what peak is what if diad2 is True: if fit_peaks==1: Peak2_Cent=None Peak3_Cent=None ax1_xlim=[Center_ini-15, Center_ini+15] ax2_xlim=[Center_ini-15, Center_ini+15] if fit_peaks==2: Peak2_Cent=result.best_values.get('lz2_center') Peak2_Int=result.best_values.get('lz2_amplitude') Peak2_Sigma=result.best_values.get('lz2_sigma') Peak3_Cent=None ax1_xlim=[Center_ini-15, Center_ini+30] ax2_xlim=[Center_ini-15, Center_ini+30] if fit_peaks==3: Peak2_Cent=result.best_values.get('lz2_center') Peak2_Int=result.best_values.get('lz2_amplitude') Peak2_Sigma=result.best_values.get('lz2_sigma') Peak3_Cent=result.best_values.get('lz3_center') Peak3_Int=result.best_values.get('lz3_amplitude') Peak3_Sigma=result.best_values.get('lz3_sigma') ax1_xlim=[Center_ini-30, Center_ini+30] ax2_xlim=[Center_ini-30, Center_ini+30] if Peak2_Cent is None: df_out=pd.DataFrame(data={'Diad2_Voigt_Cent': Peak1_Cent, 'Diad2_cent_err': error_pk2, 'Diad2_Voigt_Area': Peak1_Int, 'Diad2_Voigt_Sigma': Peak1_Sigma, 'Diad2_Voigt_Gamma': Peak1_gamma, }, index=[0]) if Peak2_Cent is not None: if Peak3_Cent is None: Peaks=np.array([Peak1_Cent, Peak2_Cent]) # Diad is lower peak Diad2_Cent=np.min(Peaks) # Hot band is upper peak' HB2_Cent=np.max(Peaks) # Allocate areas if Diad2_Cent==Peak2_Cent: Diad2_Int=Peak2_Int HB2_Int=Peak1_Int HB2_Sigma=Peak1_Sigma if Diad2_Cent==Peak1_Cent: Diad2_Int=Peak1_Int HB2_Int=Peak2_Int HB2_Sigma=Peak2_Sigma if Peak3_Cent is not None: Peaks=np.array([Peak1_Cent, Peak2_Cent, Peak3_Cent]) # C13 is lower peak C13_Cent=np.min(Peaks) # Diad is medium peak Diad2_Cent=np.median(Peaks) # Hot band is upper peak' HB2_Cent=np.max(Peaks) if Diad2_Cent==Peak2_Cent: Diad2_Int=Peak2_Int if Diad2_Cent==Peak1_Cent: Diad2_Int=Peak1_Int if Diad2_Cent==Peak3_Cent: Diad2_Int=Peak3_Int # Same for hotband if HB2_Cent==Peak2_Cent: HB2_Int=Peak2_Int HB2_Sigma=Peak2_Sigma if HB2_Cent==Peak1_Cent: HB2_Int=Peak1_Int HB2_Sigma=Peak1_Sigma if HB2_Cent==Peak3_Cent: HB2_Int=Peak3_Int HB2_Sigma=Peak2_Sigma # Same for C13 if C13_Cent==Peak2_Cent: C13_Int=Peak2_Int C13_Sigma=Peak2_Sigma if C13_Cent==Peak1_Cent: C13_Int=Peak1_Int C13_Sigma=Peak1_Sigma if C13_Cent==Peak3_Cent: C13_Int=Peak3_Int C13_Sigma=Peak3_Sigma df_out=pd.DataFrame(data={'Diad2_Voigt_Cent': Diad2_Cent, 'Diad2_cent_err': error_pk2, 'Diad2_Voigt_Area': Diad2_Int, 'Diad2_Voigt_Sigma': Peak1_Sigma, 'Diad2_Voigt_Gamma': Peak1_gamma, }, index=[0]) df_out['HB2_Cent']=HB2_Cent df_out['HB2_Area']=HB2_Int df_out['HB2_Sigma']=HB2_Sigma if Peak3_Cent is not None: df_out['C13_Cent']=C13_Cent df_out['C13_Area']=C13_Int df_out['C13_Sigma']=C13_Sigma result_diad2_origx_all=result.eval(x=xdat) # Trim to be in range #print(result_diad2_origx_all) result_diad2_origx=result_diad2_origx_all[(xdat>span[0]) & (xdat<span[1])] ydat_inrange=ydat[(xdat>span[0]) & (xdat<span[1])] xdat_inrange=xdat[(xdat>span[0]) & (xdat<span[1])] residual_diad_coords=ydat_inrange-result_diad2_origx x_cent_lin=np.linspace(df_out['Diad2_Voigt_Cent']-1, df_out['Diad2_Voigt_Cent']+1, 20000) y_cent_best_fit=result.eval(x=x_cent_lin) diad_height = np.max(y_cent_best_fit) df_out['Diad2_Combofit_Height']= diad_height df_out.insert(0, 'Diad2_Combofit_Cent', np.nanmean(x_cent_lin[y_cent_best_fit==diad_height])) residual_diad2=np.sum(((ydat_inrange-result_diad2_origx)**2)**0.5)/(len(ydat_inrange)) df_out['Diad2_Residual']=residual_diad2 df_out['Diad2_Prop_Lor']= Peak1_Prop_Lor df_out['Diad2_fwhm']=Peak1_fwhm if config1.fit_gauss is not False: df_out['Diad2_Gauss_Cent']=Gauss_cent df_out['Diad2_Gauss_Area']=Gauss_amp df_out['Diad2_Gauss_Sigma']=Gauss_sigma #Checking whether you need any flags. if error_pk2 is None: refit=True refit_param=str(refit_param)+' No Error' if Peak1_Int>=(calc_diad_amplitude*10-0.1): refit=True refit_param=str(refit_param)+' V_LowAmp' if Peak1_Sigma>=(calc_diad_amplitude*10-0.1): refit=True refit_param=str(refit_param)+' V_HighAmp' if Peak1_Sigma>=(config1.diad_sigma*config1.diad_sigma_max_allowance-0.001): refit=True refit_param=str(refit_param)+' V_Sigma_too_Low' if diad_height>(3*config1.diad_prom): refit_param=str(refit_param)+' V_Sigma_too_Low' if diad_height<(0.5*config1.diad_prom): refit_param=str(refit_param)+' V_Sigma_too_High' if Peak1_Sigma<=(config1.diad_sigma*config1.diad_sigma_min_allowance+0.001): refit=True refit_param=str(refit_param)+' V_Sigma_too_High' if diad_height<config1.threshold_intensity: refit_param=str(refit_param)+' Below threshold intensity' #if findme if plot_figure is True: fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12,4)) ax1.plot(x_lin, y_best_fit, '-g', linewidth=2, label='best fit') ax1.plot(xdat, ydat, '.k', label='data') ax1.legend() ax1.set_xlim(ax1_xlim) ax2.set_xlim(ax2_xlim) if config1.fit_gauss is not False: ax2.plot(x_lin, components.get('bkg_'), '.c',linewidth=2, label='Gaussian bck') ax2.plot(x_lin, components.get('lz1_'), '-b', linewidth=2, label='Peak1') ax2.plot(xdat, ydat, '.k') #ax2.plot(xdat, result.best_fit, '-g', label='best fit') fitspan=max(y_best_fit)-min(y_best_fit) ax2.set_ylim([min(y_best_fit)-fitspan/5, max(y_best_fit)+fitspan/5]) ax1.set_ylabel('Intensity') ax1.set_xlabel('Wavenumber') ax2.set_ylabel('Intensity') ax2.set_xlabel('Wavenumber') if fit_peaks>1: ax2.plot(x_lin, components.get('lz2_'), '-r', linewidth=2, label='Peak2') if fit_peaks>2: ax2.plot(x_lin, components.get('lz3_'), '-m', linewidth=2, label='Peak3') ax2.legend() path3=path+'/'+'diad_fit_images' if os.path.exists(path3): out='path exists' else: os.makedirs(path+'/'+ 'diad_fit_images', exist_ok=False) file=filename.rsplit('.txt', 1)[0] fig.savefig(path3+'/'+'Diad2_Fit_{}.png'.format(file), dpi=dpi) best_fit=result.best_fit df_out['Diad2_refit']=refit_param if diad1 is True: if fit_peaks==1: Peak2_Cent=None Peak3_Cent=None ax1_xlim=[Center_ini-15, Center_ini+15] ax2_xlim=[Center_ini-15, Center_ini+15] if fit_peaks==2: Peak2_Cent=result.best_values.get('lz2_center') Peak2_Int=result.best_values.get('lz2_amplitude') Peak2_Sigma=result.best_values.get('lz2_sigma') # # Checking parameters for hot bands # if Peak2_Sigma>(sigma_ini*5-0.1): # refit_param=str(refit_param)+' HB1_HighSigma' # if Peak2_Sigma<=(sigma_ini/20+0.1): # refit_param=str(refit_param)+' HB1_LowSigma' # # if Peak2_Int>(Amplitude_ini/5-0.001): # refit_param=str(refit_param)+' HB1_HighAmp' # if Peak2_Int<=(Amplitude_ini/100+0.001): # refit_param=str(refit_param)+' HB1_LowAmp' Peak3_Cent=None ax1_xlim=[Center_ini-30, Center_ini+15] ax2_xlim=[Center_ini-30, Center_ini+15] if Peak2_Cent is None: df_out=pd.DataFrame(data={'Diad1_Voigt_Cent': Peak1_Cent, 'Diad1_cent_err': error_pk2, 'Diad1_Voigt_Area': Peak1_Int, 'Diad1_Voigt_Sigma': Peak1_Sigma, 'Diad1_Voigt_Gamma': Peak1_gamma, }, index=[0]) if Peak2_Cent is not None: df_out=pd.DataFrame(data={'Diad1_Voigt_Cent': Peak1_Cent, 'Diad1_cent_err': error_pk2, 'Diad1_Voigt_Area': Peak1_Int, 'Diad1_Voigt_Sigma': Peak1_Sigma, 'Diad1_Voigt_Gamma': Peak1_gamma, }, index=[0]) df_out['HB1_Cent']=Peak2_Cent df_out['HB1_Area']=Peak2_Int df_out['HB1_Sigma']=Peak2_Sigma result_diad1_origx_all=result.eval(x=xdat) # Trim to be in range #print(result_diad2_origx_all) result_diad1_origx=result_diad1_origx_all[(xdat>span[0]) & (xdat<span[1])] ydat_inrange=ydat[(xdat>span[0]) & (xdat<span[1])] xdat_inrange=xdat[(xdat>span[0]) & (xdat<span[1])] residual_diad_coords=ydat_inrange-result_diad1_origx x_cent_lin=np.linspace(df_out['Diad1_Voigt_Cent']-1, df_out['Diad1_Voigt_Cent']+1, 20000) y_cent_best_fit=result.eval(x=x_cent_lin) diad_height = np.max(y_cent_best_fit) df_out['Diad1_Combofit_Height']= diad_height df_out.insert(0, 'Diad1_Combofit_Cent', np.nanmean(x_cent_lin[y_cent_best_fit==diad_height])) residual_diad1=np.sum(((ydat_inrange-result_diad1_origx)**2)**0.5)/(len(ydat_inrange)) df_out['Diad1_Residual']=residual_diad1 df_out['Diad1_Prop_Lor']= Peak1_Prop_Lor df_out['Diad1_fwhm']=Peak1_fwhm if config1.fit_gauss is not False: df_out['Diad1_Gauss_Cent']=Gauss_cent df_out['Diad1_Gauss_Area']=Gauss_amp df_out['Diad1_Gauss_Sigma']=Gauss_sigma if plot_figure is True: fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12,4)) ax1.plot(x_lin, y_best_fit, '-g', linewidth=2, label='best fit') ax1.plot(xdat, ydat, '.k', label='data') ax1.legend() ax1.set_xlim(ax1_xlim) ax2.set_xlim(ax2_xlim) if config1.fit_gauss is not False: ax2.plot(x_lin, components.get('bkg_'), '.c',linewidth=2, label='Gaussian bck') ax2.plot(x_lin, components.get('lz1_'), '-b', linewidth=2, label='Peak1') ax2.plot(xdat, ydat, '.k') #ax2.plot(xdat, result.best_fit, '-g', label='best fit') fitspan=max(y_best_fit)-min(y_best_fit) ax2.set_ylim([min(y_best_fit)-fitspan/5, max(y_best_fit)+fitspan/5]) ax1.set_ylabel('Intensity') ax1.set_xlabel('Wavenumber') ax2.set_ylabel('Intensity') ax2.set_xlabel('Wavenumber') if fit_peaks>1: ax2.plot(x_lin, components.get('lz2_'), '-r', linewidth=2, label='Peak2') if fit_peaks>2: ax2.plot(x_lin, components.get('lz3_'), '-m', linewidth=2, label='Peak3') # if len(peak_pos_voigt)>1: # lowerpeak=np.min([Peak1_Cent, Peak2_Cent]) # upperpeak=np.max([Peak1_Cent, Peak2_Cent]) # ax1.set_xlim([lowerpeak-15, upperpeak+15]) # if len(peak_pos_voigt)>1: # ax2.set_xlim([lowerpeak-20, upperpeak+20]) ax2.legend() path3=path+'/'+'diad_fit_images' if os.path.exists(path3): out='path exists' else: os.makedirs(path+'/'+ 'diad_fit_images', exist_ok=False) file=filename.rsplit('.txt', 1)[0] fig.savefig(path3+'/'+'Diad1_Fit_{}.png'.format(file), dpi=dpi) best_fit=result.best_fit df_out['Diad1_refit']=refit_param # Final check - that Gaussian isnt anywhere near the height of the diad df_out['reduced_chi_squared']=reduced_chi_squared df_out = df_out.fillna(0) df_out = df_out.infer_objects(copy=False) return result, df_out, y_best_fit, x_lin, components, xdat, ydat, ax1_xlim, ax2_xlim, residual_diad_coords, ydat_inrange, xdat_inrange
[docs] def fit_diad_2_w_bck(*, config1: diad2_fit_config=diad2_fit_config(), config2: diad_id_config=diad_id_config(), path=None, filename=None, peak_pos_voigt=None,filetype=None, diad_array=None, plot_figure=True, close_figure=False, Diad_pos=None, HB_pos=None, C13_pos=None, minimise='weighted_least_squares'): """ This function fits the background (using the function remove_diad_baseline) and then fits the peaks using fit_gaussian_voigt_generic_diad() It then checks if any parameters are right at the permitted edge (meaning fitting didnt converge), and tries fitting if so. Then it makes a plot of the various parts of the fitting procedure, and returns a dataframe of the fit parameters. Parameters ------------------ diad_id_config() Dataclass of parameters used to look for peaks. Uses any excluded ranges from this. diad2_fit_config() Dataclass of parameters used to find peaks. Use help on this dataclass to get options. path: str Folder user wishes to read data from filename: str Specific name of file being used. filetype: str Identifies type of file. choose from 'Witec_ASCII', 'headless_txt', 'headless_csv', 'head_csv', 'Witec_ASCII', 'HORIBA_txt', 'Renishaw_txt' plot_figure: bool if True, saves figure close_fig: bool if True, displays figure in Notebook. If False, closes it (you can still find it saved) Diad_pos: float Estimate of diad peak position HB_pos: float Estimate of HB peak position C13_pos: float Estimate of C13 peak position minimise: str 'weighted_least_squares' vs 'least_squares' Returns ------------ if return_other_params is False (default) returns a dataframe of peak parameters if True, also returns: result: fit parameters y_best_fit: best fit to all curves in y x_lin: linspace of best fit coordinates in x. """ if 'CRR' in filename: filetype='headless_txt' model=config1.model_name if model not in allowed_models: raise ValueError(f"Unsupported model: {model}. Supported models are: {', '.join(allowed_models)}") # Check number of peaks makes sense fit_peaks=config1.fit_peaks if diad_array is None: Diad_df=get_data(path=path, filename=filename, filetype=filetype) Diad=np.array(Diad_df) else: Diad=diad_array if fit_peaks==2: if np.isnan(HB_pos)==True or config1.HB_prom<0: fit_peaks=1 fit_gauss=False if fit_peaks==3: # This tests if HB is Nan and C13 is Nan if np.isnan(HB_pos)==True and np.isnan(C13_pos)==True: fit_peaks=1 fit_gauss=False # If only HB is Nan, lets instead allocate a HB positoin. elif np.isnan(HB_pos): HB_region=[Diad_pos+19, Diad_pos+23] filter=(Diad[:, 0]>=HB_region[0])& (Diad[:,0]<=HB_region[1]) TrimDiad=Diad[filter] filter2=(Diad[:, 0]>=(HB_region[0]-5))& (Diad[:,0]<=(HB_region[1]+5)) TrimDiad2=Diad[filter2] maxy=np.max(TrimDiad[:, 1]) maxx=TrimDiad[TrimDiad[:, 1]==maxy] HB_pos=float(maxx[:, 0][0]) miny=np.min(TrimDiad2[:, 1]) config1.HB_prom=maxy-miny config1.gauss_amp=2*(maxy-miny) # If no C13, reduce fit peaks to 2. elif np.isnan(C13_pos)==True or config1.C13_prom<10: fit_peaks=2 # First, we feed data into the remove baseline function, which returns corrected data y_corr_diad2, Py_base_diad2, x_diad2, Diad_short_diad2, Py_base_diad2, Pf_baseline_diad2, Baseline_ysub_diad2, Baseline_x_diad2, Baseline_diad2, span_diad2=remove_diad_baseline( path=path, filename=filename, filetype=filetype, exclude_range1=config2.exclude_range1, exclude_range2=config2.exclude_range2, N_poly=config1.N_poly_bck_diad2, lower_bck=config1.lower_bck_diad2, upper_bck=config1.upper_bck_diad2, plot_figure=False, diad_array=diad_array) # Then, we feed this baseline-corrected data into the combined gaussian-voigt peak fitting function result, df_out, y_best_fit, x_lin, components, xdat, ydat, ax1_xlim, ax2_xlim,residual_diad_coords, ydat_inrange, xdat_inrange=fit_gaussian_voigt_generic_diad(config1, diad2=True, path=path, filename=filename, xdat=x_diad2, ydat=y_corr_diad2, span=span_diad2, plot_figure=False, Diad_pos=Diad_pos, HB_pos=HB_pos, C13_pos=C13_pos, fit_peaks=fit_peaks, py_baseline=Py_base_diad2, minimise=minimise) if any(df_out['Diad2_refit'].str.contains('Below threshold intensity')): print('below your threshold intensity, we have filled with nans') else: # Try Refitting once if str(df_out['Diad2_refit'].iloc[0])!='Flagged Warnings:': print('refit attempt 1') factor=2 import copy config_tweaked=config1 config_tweaked=copy.copy(config1) if any(df_out['Diad2_refit'].str.contains(' No Error')): if fit_peaks>1: fit_peaks=fit_peaks-1 # if any(df_out['Diad2_refit'].str.contains('V_HighAmp')): # config_tweaked.diad_amplitude=calc_diad_amplitude/10 if any(df_out['Diad2_refit'].str.contains('V_Sigma_too_Low')): config_tweaked.diad_sigma=config1.diad_sigma*factor if any(df_out['Diad2_refit'].str.contains('V_Sigma_too_High')): config_tweaked.diad_sigma=config1.diad_sigma/factor # Gaussian fit bits if any(df_out['Diad2_refit'].str.contains('G_input_TooHighSigma')): config_tweaked.gauss_sigma=config1.gauss_sigma/factor if any(df_out['Diad2_refit'].str.contains('G_input_TooLowSigma')): config_tweaked.gauss_sigma=config1.gauss_sigma*factor if any(df_out['Diad2_refit'].str.contains('G_input_TooLowAmp')): config_tweaked.gauss_amplitude=config1.gauss_amp*factor if any(df_out['Diad2_refit'].str.contains('G_Amp_too_high')): config_tweaked.gauss_amplitude=config1.gauss_amp/factor if any(df_out['Diad2_refit'].str.contains('G_higher_than_diad')): config_tweaked.gauss_amplitude=config1.gauss_amp/factor #config_tweaked.gauss_sigma=config1.gauss_sigma*factor # Hot band fit bits # if any(df_out['Diad2_refit'].str.contains('HB2_LowAmp')): # config_tweaked.HB_amplitude=calc_HB_amplitude/2 # if any(df_out['Diad2_refit'].str.contains('HB2_HighAmp')): # config_tweaked.HB_amplitude=calc_HB_amplitude*2 result, df_out, y_best_fit, x_lin, components, xdat, ydat, ax1_xlim, ax2_xlim,residual_diad_coords, ydat_inrange, xdat_inrange=fit_gaussian_voigt_generic_diad(config_tweaked, diad2=True, path=path, filename=filename, xdat=x_diad2, ydat=y_corr_diad2, span=span_diad2, plot_figure=False, Diad_pos=Diad_pos, HB_pos=HB_pos, C13_pos=C13_pos, fit_peaks=fit_peaks, py_baseline=Py_base_diad2, minimise=minimise) i=2 while str(df_out['Diad2_refit'].iloc[0])!='Flagged Warnings:': print('refit attempt ='+str(i) + ', '+str(df_out['Diad2_refit'].iloc[0])) print(df_out['Diad2_refit'].iloc[0]) config_tweaked2=copy.copy(config_tweaked) factor2=factor*2 # # if any(df_out['Diad2_refit'].str.contains('V_LowAmp')): # config_tweaked2.diad_amplitude=config_tweaked.diad_amplitude*10 # if any(df_out['Diad2_refit'].str.contains('V_HighAmp')): # config_tweaked2.diad_amplitude=config_tweaked.diad_amplitude/10 if any(df_out['Diad2_refit'].str.contains('V_Sigma_too_Low')): config_tweaked2.diad_sigma=config_tweaked.diad_sigma*factor2 if any(df_out['Diad2_refit'].str.contains('V_Sigma_too_High')): config_tweaked2.diad_sigma=config_tweaked.diad_sigma/factor2 # Gaussian fit bits if any(df_out['Diad2_refit'].str.contains('G_input_TooHighSigma')): config_tweaked2.gauss_sigma=config_tweaked.gauss_sigma/factor2 if any(df_out['Diad2_refit'].str.contains('G_input_TooLowSigma')): config_tweaked2.gauss_sigma=config_tweaked.gauss_sigma*factor2 if any(df_out['Diad2_refit'].str.contains('G_input_TooLowAmp')): config_tweaked2.gauss_amplitude=config_tweaked.gauss_amp*factor2 if any(df_out['Diad2_refit'].str.contains('G_Amp_too_high')): config_tweaked2.gauss_amplitude=config_tweaked.gauss_amp/factor2 if any(df_out['Diad2_refit'].str.contains('G_higher_than_diad')): config_tweaked2.gauss_amplitude=config_tweaked.gauss_amp/factor2 #config_tweaked2.gauss_sigma=config_tweaked.gauss_sigma*factor2 # # Hot bands bits # if any(df_out['Diad2_refit'].str.contains('HB2_LowAmp')): # config_tweaked2.HB_amplitude=config_tweaked.HB_amplitude/2 # if any(df_out['Diad2_refit'].str.contains('HB2_HighAmp')): # config_tweaked2.HB_amplitude=config_tweaked.HB_amplitude*2 result, df_out, y_best_fit, x_lin, components, xdat, ydat, ax1_xlim, ax2_xlim,residual_diad_coords, ydat_inrange, xdat_inrange=fit_gaussian_voigt_generic_diad(config_tweaked2, diad2=True, path=path, filename=filename, xdat=x_diad2, ydat=y_corr_diad2, span=span_diad2, plot_figure=False, Diad_pos=Diad_pos, HB_pos=HB_pos, C13_pos=C13_pos, fit_peaks=fit_peaks, py_baseline=Py_base_diad2, minimise=minimise) i=i+1 if i>5: print('Got to 5 iteratoins and still couldnt adjust the fit parameters') break # get a best fit to the baseline using a linspace from the peak fitting ybase_xlin=Pf_baseline_diad2(x_lin) # We extract the full spectra to plot at the end, and convert to a dataframe #Spectra_df=get_data(path=path, filename=filename, filetype=filetype) Spectra=Diad # Make nice figure if plot_figure is True: figure_mosaic=""" XY AB CD EE """ fig,axes=plt.subplot_mosaic(mosaic=figure_mosaic, figsize=(12, 16)) if df_out['Diad2_refit'] is not False: fig.suptitle('Diad 2, file= '+ str(filename) + ' \n' + str(df_out['Diad2_refit'].iloc[0]), fontsize=16, x=0.5, y=1.0) else: fig.suptitle('Diad 2, file= '+ str(filename), fontsize=16, x=0.5, y=1.0) # Background plot for real # Plot best fit on the LHS, and individual fits on the RHS at the top axes['X'].set_title('a) Background fit') axes['X'].plot(Diad[:, 0], Diad[:, 1], '-', color='grey') axes['X'].plot(Diad_short_diad2[:, 0], Diad_short_diad2[:, 1], '-r', label='Spectra') #axes['X'].plot(Baseline[:, 0], Baseline[:, 1], '-b', label='Bck points') axes['X'].plot(Baseline_diad2[:, 0], Baseline_diad2[:, 1], '.b', label='bel. Bck. pts') axes['X'].plot(Diad_short_diad2[:, 0], Py_base_diad2, '-k', label='bck. poly fit') ax1_ymin=np.min(Baseline_diad2[:, 1])-10*np.std(Baseline_diad2[:, 1]) ax1_ymax=np.max(Baseline_diad2[:, 1])+10*np.std(Baseline_diad2[:, 1]) ax1_xmin=config1.lower_bck_diad2[0]-30 ax1_xmax=config1.upper_bck_diad2[1]+30 # Adding patches rect_diad2_b1=patches.Rectangle((config1.lower_bck_diad2[0], ax1_ymin),config1.lower_bck_diad2[1]-config1.lower_bck_diad2[0],ax1_ymax-ax1_ymin, linewidth=1,edgecolor='none',facecolor='cyan', label='sel. bck. region', alpha=0.3, zorder=0) axes['X'].add_patch(rect_diad2_b1) rect_diad2_b2=patches.Rectangle((config1.upper_bck_diad2[0], ax1_ymin),config1.upper_bck_diad2[1]-config1.upper_bck_diad2[0],ax1_ymax-ax1_ymin, linewidth=1,edgecolor='none',facecolor='cyan', alpha=0.3, zorder=0) axes['X'].add_patch(rect_diad2_b2) axes['X'].set_xlim([ax1_xmin, ax1_xmax]) axes['X'].set_ylim([ax1_ymin, ax1_ymax]) axes['X'].set_ylabel('Intensity') axes['Y'].set_ylabel('Intensity') axes['Y'].set_xlabel('Wavenumber') axes['X'].legend() axes['Y'].set_title('b) Background subtracted spectra') axes['Y'].plot(x_diad2, y_corr_diad2, '-r') height_p=np.max(Diad_short_diad2[:, 1])-np.min(Diad_short_diad2[:, 1]) axes['Y'].set_ylim([np.min(y_corr_diad2), 1.2*height_p ]) axes['X'].set_xlabel('Wavenumber') axes['A'].plot(xdat, ydat, '.k', label='bck. sub. data') axes['A'].plot( x_lin ,y_best_fit, '-g', linewidth=1, label='best fit') axes['A'].legend() axes['A'].set_ylabel('Intensity') axes['A'].set_xlabel('Wavenumber') axes['A'].set_xlim(ax1_xlim) axes['A'].set_title('c) Overall Best Fit') # individual fits axes['B'].plot(xdat, ydat, '.k') # This is for if there is more than 1 peak, this is when we want to plot the best fit if fit_peaks>1: axes['B'].plot(x_lin, components.get('lz2_'), '-r', linewidth=2, label='HB2') axes['B'].plot(x_lin, components.get('lz1_'), '-b', linewidth=2, label='Diad2') if config1.fit_gauss is not False: axes['B'].plot(x_lin, components.get('bkg_'), '-m', label='Gaussian bck', linewidth=2) #ax2.plot(xdat, result.best_fit, '-g', label='best fit') axes['B'].legend() fitspan=max(y_best_fit)-min(y_best_fit) axes['B'].set_ylim([min(y_best_fit)-fitspan/5, max(y_best_fit)+fitspan/5]) axes['B'].set_ylabel('Intensity') axes['B'].set_xlabel('Wavenumber') axes['B'].set_xlim(ax2_xlim) # Dashed lines so matches part D axes['B'].plot([df_out['Diad2_Voigt_Cent'], df_out['Diad2_Voigt_Cent']], [np.min(ydat), np.max(ydat)], ':b') if fit_peaks>=2: axes['B'].plot([df_out['HB2_Cent'], df_out['HB2_Cent']], [np.min(ydat), np.max(ydat)], ':r') axes['B'].set_title('c) Fit Components') # Background fit # First, set up x and y limits on axis if config1.x_range_baseline is not None: axc_xmin=df_out['Diad2_Voigt_Cent'][0]-config1.x_range_baseline axc_xmax=df_out['Diad2_Voigt_Cent'][0]+config1.x_range_baseline else: axc_xmin=config1.lower_bck_diad2[0] axc_xmax=config1.upper_bck_diad2[1] axc_ymin=np.min(Baseline_diad2[:, 1])-config1.y_range_baseline/3 axc_ymax=np.max(Baseline_diad2[:, 1])+config1.y_range_baseline rect_diad2_b1=patches.Rectangle((config1.lower_bck_diad2[0],axc_ymin),config1.lower_bck_diad2[1]-config1.lower_bck_diad2[0],axc_ymax-axc_ymin, linewidth=1,edgecolor='none',facecolor='cyan', label='bck', alpha=0.3, zorder=0) axes['C'].set_title('d) Peaks overlain on data before subtraction') axes['C'].plot(Diad_short_diad2[:, 0], Diad_short_diad2[:, 1], '.r', label='data') axes['C'].plot(Baseline_diad2[:, 0], Baseline_diad2[:, 1], '.b', label='bck') axes['C'].plot(Diad_short_diad2[:, 0], Py_base_diad2, '-k', label='Poly bck fit') axes['C'].set_ylabel('Intensity') axes['C'].set_xlabel('Wavenumber') if config1.fit_gauss is not False: axes['C'].plot(x_lin, components.get('bkg_')+ybase_xlin, '-m', label='Gaussian bck', linewidth=2) axes['C'].plot( x_lin ,y_best_fit+ybase_xlin, '-g', linewidth=2, label='Best Fit') axes['C'].plot(x_lin, components.get('lz1_')+ybase_xlin, '-b', label='Diad2', linewidth=1) if fit_peaks>1: axes['C'].plot(x_lin, components.get('lz2_')+ybase_xlin, '-r', label='HB2', linewidth=1) if fit_peaks>2: axes['C'].plot(x_lin, components.get('lz3_')+ybase_xlin, '-c', label='C13', linewidth=1) axes['C'].legend(ncol=3, loc='lower center') axes['C'].set_xlim([axc_xmin, axc_xmax]) axes['C'].set_ylim([axc_ymin, axc_ymax]) #axes['C'].plot(Diad_short[:, 0], Diad_short[:, 1], '"r', label='Data') # Residual on plot D axes['D'].set_title('f) Residuals') axes['D'].plot([df_out['Diad2_Voigt_Cent'], df_out['Diad2_Voigt_Cent']], [np.min(residual_diad_coords), np.max(residual_diad_coords)], ':b') if fit_peaks>1: axes['D'].plot([df_out['HB2_Cent'], df_out['HB2_Cent']], [np.min(residual_diad_coords), np.max(residual_diad_coords)], ':r') axes['D'].plot(xdat_inrange, residual_diad_coords, 'ok', mfc='c' ) axes['D'].plot(xdat_inrange, residual_diad_coords, '-c' ) axes['D'].set_ylabel('Residual') axes['D'].set_xlabel('Wavenumber') # axes['D'].set_xlim(ax1_xlim) # axes['D'].set_xlim(ax2_xlim) Local_Residual_diad2=residual_diad_coords[((xdat_inrange>(df_out['Diad2_Voigt_Cent'][0]-config1.x_range_residual)) &(xdat_inrange<df_out['Diad2_Voigt_Cent'][0]+config1.x_range_residual))] axes['D'].set_xlim([df_out['Diad2_Voigt_Cent'][0]-config1.x_range_residual, df_out['Diad2_Voigt_Cent'][0]+config1.x_range_residual]) #ax5.plot([cent_1117, cent_1117 ], [np.min(Local_Residual_1117)-10, np.max(Local_Residual_1117)+10], ':k') axes['D'].set_ylim([np.min(Local_Residual_diad2)-10, np.max(Local_Residual_diad2)+10]) # Overal spectra axes['E'].set_title('g) Summary plot of raw spectra for file = ' + filename) axes['E'].plot(Spectra[:, 0], Spectra[:, 1], '-r') axes['E'].set_ylabel('Intensity') axes['E'].set_xlabel('Wavenumber') path3=path+'/'+'diad_fit_images' if os.path.exists(path3): out='path exists' else: os.makedirs(path+'/'+ 'diad_fit_images', exist_ok=False) fig.tight_layout() file=filename.rsplit('.txt', 1)[0] fig.savefig(path3+'/'+'diad2_Fit_{}.png'.format(file), dpi=config1.dpi) if close_figure is True: plt.close(fig) # Lets calculate peak skewness here. Skew50=assess_diad2_skewness(config1=diad2_fit_config(), int_cut_off=0.5, path=path, filename=filename, filetype=filetype, skewness='abs', height=1, prominence=5, width=0.5, plot_figure=False, diad_array=diad_array) Skew80=assess_diad2_skewness(config1=diad2_fit_config(), int_cut_off=0.3, path=path, filename=filename, filetype=filetype, skewness='abs', height=1, prominence=5, width=0.5, plot_figure=False, diad_array=diad_array) df_out['Diad2_Asym50']=Skew50['Skewness_diad2'] df_out['Diad2_Asym70']=Skew80['Skewness_diad2'] df_out['Diad2_Yuan2017_sym_factor']=(df_out['Diad2_fwhm'])*(df_out['Diad2_Asym50']-1) df_out['Diad2_Remigi2021_BSF']=df_out['Diad2_fwhm']/df_out['Diad2_Combofit_Height'] df_out['Diad2_PDF_Model']=model df_out = df_out.rename(columns={'reduced_chi_squared': 'Diad2_reduced_chi_squared'}) if config1.return_other_params is False: return df_out else: return df_out, result, y_best_fit, x_lin
[docs] def fit_diad_1_w_bck(*, config1: diad1_fit_config=diad1_fit_config(), config2: diad_id_config=diad_id_config(), path=None, filename=None, filetype=None, plot_figure=True, close_figure=True, Diad_pos=None, HB_pos=None, diad_array=None, minimise='weighted_least_squares'): """ This function fits the background (using the function remove_diad_baseline) and then fits the peaks using fit_gaussian_voigt_generic_diad() It then checks if any parameters are right at the permitted edge (meaning fitting didnt converge), and tries fitting if so. Then it makes a plot of the various parts of the fitting procedure, and returns a dataframe of the fit parameters. Parameters ----------- diad_id_config() Dataclass of parameters used to look for peaks. Uses any excluded ranges from this. diad1_fit_config() Dataclass of parameters used to find peaks. Use help on this dataclass to get options path: str Folder user wishes to read data from filename: str Specific name of file being used. filetype: str Identifies type of file. choose from 'Witec_ASCII', 'headless_txt', 'headless_csv', 'head_csv', 'Witec_ASCII', 'HORIBA_txt', 'Renishaw_txt' plot_figure: bool if True, saves figure Diad_pos: float Estimate of diad position HB_pos: float Estimate of HB position close_fig: bool if True, displays figure in Notebook. If False, closes it (you can still find it saved) minimise: str 'weighted_least_squares' or 'least_squares' Returns ------------ if return_other_params is False (default) returns a dataframe of peak parameters if True, also returns: result: fit parameters y_best_fit: best fit to all curves in y x_lin: linspace of best fit coordinates in x. """ if 'CRR' in filename: filetype='headless_txt' model=config1.model_name if model not in allowed_models: raise ValueError(f"Unsupported model: {model}. Supported models are: {', '.join(allowed_models)}") fit_peaks=config1.fit_peaks if fit_peaks==2: if np.isnan(HB_pos)==True or config1.HB_prom<-50: fit_peaks=1 config1.fit_gauss=False #print('Either no hb position, or prominence<-50, using 1 fit') if diad_array is None: Diad_df=get_data(path=path, filename=filename, filetype=filetype) Diad=np.array(Diad_df) else: Diad=diad_array # First, we feed data into the remove baseline function, which returns corrected data y_corr_diad1, Py_base_diad1, x_diad1, Diad_short_diad1, Py_base_diad1, Pf_baseline_diad1, Baseline_ysub_diad1, Baseline_x_diad1, Baseline_diad1, span_diad1=remove_diad_baseline( path=path, filename=filename, filetype=filetype, exclude_range1=config2.exclude_range1, exclude_range2=config2.exclude_range2, N_poly=config1.N_poly_bck_diad1, lower_bck=config1.lower_bck_diad1, upper_bck=config1.upper_bck_diad1, plot_figure=False, diad_array=diad_array) # Then, we feed this baseline-corrected data into the combined gaussian-voigt peak fitting function result, df_out, y_best_fit, x_lin, components, xdat, ydat, ax1_xlim, ax2_xlim,residual_diad_coords, ydat_inrange, xdat_inrange=fit_gaussian_voigt_generic_diad(config1,diad1=True, path=path, filename=filename, xdat=x_diad1, ydat=y_corr_diad1, span=span_diad1, plot_figure=False, Diad_pos=Diad_pos, HB_pos=HB_pos,fit_peaks=fit_peaks, py_baseline=Py_base_diad1, minimise=minimise) # if df_out['Diad1_cent_err'].iloc[0]==0: # if config1.fit_peaks>1: # config_tweaked.fit_peaks=config1.fit_peaks-1 # print('reducing peaks by 1 to try to get an error') # Try Refitting once if str(df_out['Diad1_refit'].iloc[0])!='Flagged Warnings:': print('refit attempt 1') factor=2 import copy config_tweaked=config1 config_tweaked=copy.copy(config1) if any(df_out['Diad1_refit'].str.contains(' No Error')): if config1.fit_peaks>2: config_tweaked.fit_peaks=config1.fit_peaks-1 fit_peaks=config_tweaked.fit_peaks if any(df_out['Diad1_refit'].str.contains('V_Sigma_too_Low')): config_tweaked.diad_sigma=config1.diad_sigma*factor if any(df_out['Diad1_refit'].str.contains('V_Sigma_too_High')): config_tweaked.diad_sigma=config1.diad_sigma/factor # Gaussian fit bits if any(df_out['Diad1_refit'].str.contains('G_input_TooHighSigma')): config_tweaked.gauss_sigma=config1.gauss_sigma/factor if any(df_out['Diad1_refit'].str.contains('G_input_TooLowSigma')): config_tweaked.gauss_sigma=config1.gauss_sigma*factor if any(df_out['Diad1_refit'].str.contains('G_input_TooLowAmp')): config_tweaked.gauss_amplitude=config1.gauss_amp*factor if any(df_out['Diad1_refit'].str.contains('G_Amp_too_high')): config_tweaked.gauss_amplitude=config1.gauss_amp/factor if any(df_out['Diad1_refit'].str.contains('G_higher_than_diad')): config_tweaked.gauss_amplitude=config1.gauss_amp/factor config_tweaked.gauss_sigma=config1.gauss_sigma*factor # # Hot band fit bits # if any(df_out['Diad1_refit'].str.contains('HB1_LowAmp')): # config_tweaked.HB_amplitude=calc_HB_amplitude/2 # if any(df_out['Diad1_refit'].str.contains('HB1_HighAmp')): # config_tweaked.HB_amplitude=calc_HB_amplitude*2 result, df_out, y_best_fit, x_lin, components, xdat, ydat, ax1_xlim, ax2_xlim,residual_diad_coords, ydat_inrange, xdat_inrange=fit_gaussian_voigt_generic_diad(config_tweaked, diad1=True, path=path, filename=filename, xdat=x_diad1, ydat=y_corr_diad1, span=span_diad1, plot_figure=False, Diad_pos=Diad_pos, HB_pos=HB_pos, fit_peaks=config_tweaked.fit_peaks, py_baseline=Py_base_diad1, minimise=minimise) i=2 while str(df_out['Diad1_refit'].iloc[0])!='Flagged Warnings:': print('refit attempt ='+str(i) + ', '+str(df_out['Diad1_refit'].iloc[0])) print(df_out['Diad1_refit'].iloc[0]) import copy config_tweaked2=copy.copy(config_tweaked) factor2=factor*2 # if any(df_out['Diad1_refit'].str.contains('V_LowAmp')): # config_tweaked2.diad_amplitude=config_tweaked.diad_amplitude*10 # if any(df_out['Diad1_refit'].str.contains('V_HighAmp')): # config_tweaked2.diad_amplitude=config_tweaked.diad_amplitude/10 if any(df_out['Diad1_refit'].str.contains('V_Sigma_too_Low')): config_tweaked2.diad_sigma=config_tweaked.diad_sigma*factor2 if any(df_out['Diad1_refit'].str.contains('V_Sigma_too_High')): config_tweaked2.diad_sigma=config_tweaked.diad_sigma/factor2 # Gaussian fit bits if any(df_out['Diad1_refit'].str.contains('G_input_TooHighSigma')): config_tweaked2.gauss_sigma=config_tweaked.gauss_sigma/factor2 if any(df_out['Diad1_refit'].str.contains('G_input_TooLowSigma')): config_tweaked2.gauss_sigma=config_tweaked.gauss_sigma*factor2 if any(df_out['Diad1_refit'].str.contains('G_input_TooLowAmp')): config_tweaked2.gauss_amplitude=config_tweaked.gauss_amp*factor2 if any(df_out['Diad1_refit'].str.contains('G_Amp_too_high')): config_tweaked2.gauss_amplitude=config_tweaked.gauss_amp/factor2 if any(df_out['Diad1_refit'].str.contains('G_higher_than_diad')): config_tweaked2.gauss_amplitude=config_tweaked.gauss_amp/factor2 config_tweaked2.gauss_sigma=config_tweaked.gauss_sigma*factor2 # Hot bands bits # if any(df_out['Diad1_refit'].str.contains('HB1_LowAmp')): # config_tweaked2.HB_amplitude=config_tweaked.HB_amplitude/2 # if any(df_out['Diad1_refit'].str.contains('HB1_HighAmp')): # config_tweaked2.HB_amplitude=config_tweaked.HB_amplitude*2 result, df_out, y_best_fit, x_lin, components, xdat, ydat, ax1_xlim, ax2_xlim,residual_diad_coords, ydat_inrange, xdat_inrange=fit_gaussian_voigt_generic_diad(config_tweaked2,diad1=True, path=path, filename=filename, xdat=x_diad1, ydat=y_corr_diad1, span=span_diad1, plot_figure=False, Diad_pos=Diad_pos, HB_pos=HB_pos, fit_peaks=config_tweaked.fit_peaks, py_baseline=Py_base_diad1, minimise=minimise) i=i+1 if i>5: print('Got to 5 iteratoins and still couldnt adjust the fit parameters') break # get a best fit to the baseline using a linspace from the peak fitting ybase_xlin=Pf_baseline_diad1(x_lin) # We extract the full spectra to plot at the end, and convert to a dataframe Spectra=Diad # Make nice figure if plot_figure is True: figure_mosaic=""" XY AB CD EE """ fig,axes=plt.subplot_mosaic(mosaic=figure_mosaic, figsize=(12, 16)) fig.suptitle('Diad 1, file= '+ str(filename) + ' \n' + str(df_out['Diad1_refit'].iloc[0]), fontsize=16, x=0.5, y=1.0) # Background plot for rea # Plot best fit on the LHS, and individual fits on the RHS at the top axes['X'].set_title('a) Background fit') axes['X'].plot(Diad[:, 0], Diad[:, 1], '-', color='grey') axes['X'].plot(Diad_short_diad1[:, 0], Diad_short_diad1[:, 1], '-r', label='Spectra') #axes['X'].plot(Baseline[:, 0], Baseline[:, 1], '-b', label='Bck points') axes['X'].plot(Baseline_diad1[:, 0], Baseline_diad1[:, 1], '.b', label='Sel. bck. pts') axes['X'].plot(Diad_short_diad1[:, 0], Py_base_diad1, '-k', label='bck. poly fit') ax1_ymin=np.min(Baseline_diad1[:, 1])-10*np.std(Baseline_diad1[:, 1]) ax1_ymax=np.max(Baseline_diad1[:, 1])+10*np.std(Baseline_diad1[:, 1]) ax1_xmin=config1.lower_bck_diad1[0]-30 ax1_xmax=config1.upper_bck_diad1[1]+30 # Adding patches rect_diad1_b1=patches.Rectangle((config1.lower_bck_diad1[0], ax1_ymin),config1.lower_bck_diad1[1]-config1.lower_bck_diad1[0],ax1_ymax-ax1_ymin, linewidth=1,edgecolor='none',facecolor='cyan', label='sel. bck. region', alpha=0.3, zorder=0) axes['X'].add_patch(rect_diad1_b1) rect_diad1_b2=patches.Rectangle((config1.upper_bck_diad1[0], ax1_ymin),config1.upper_bck_diad1[1]-config1.upper_bck_diad1[0],ax1_ymax-ax1_ymin, linewidth=1,edgecolor='none',facecolor='cyan', alpha=0.3, zorder=0) axes['X'].add_patch(rect_diad1_b2) axes['X'].set_xlim([ax1_xmin, ax1_xmax]) axes['X'].set_ylim([ax1_ymin, ax1_ymax]) axes['X'].set_ylabel('Intensity') axes['Y'].set_ylabel('Intensity') axes['Y'].set_xlabel('Wavenumber') axes['X'].legend() axes['Y'].set_title('b) Background subtracted spectra') axes['Y'].plot(x_diad1, y_corr_diad1, '-r') height_p=np.max(Diad_short_diad1[:, 1])-np.min(Diad_short_diad1[:, 1]) axes['Y'].set_ylim([np.min(y_corr_diad1), 1.2*height_p ]) axes['X'].set_xlabel('Wavenumber') axes['A'].plot(xdat, ydat, '.k', label='bck. sub. data') axes['A'].plot( x_lin ,y_best_fit, '-g', linewidth=1, label='best fit') axes['A'].legend() axes['A'].set_ylabel('Intensity') axes['A'].set_xlabel('Wavenumber') axes['A'].set_xlim(ax1_xlim) axes['A'].set_title('c) Overall Best Fit') # individual fits axes['B'].plot(xdat, ydat, '.k') # This is for if there is more than 1 peak, this is when we want to plot the best fit if fit_peaks>1: axes['B'].plot(x_lin, components.get('lz2_'), '-r', linewidth=2, label='HB1') axes['B'].plot(x_lin, components.get('lz1_'), '-b', linewidth=2, label='Diad1') if config1.fit_gauss is not False: axes['B'].plot(x_lin, components.get('bkg_'), '-m', label='Gaussian bck', linewidth=2) #ax2.plot(xdat, result.best_fit, '-g', label='best fit') axes['B'].legend() fitspan=max(y_best_fit)-min(y_best_fit) axes['B'].set_ylim([min(y_best_fit)-fitspan/5, max(y_best_fit)+fitspan/5]) axes['B'].set_ylabel('Intensity') axes['B'].set_xlabel('Wavenumber') axes['B'].set_xlim(ax2_xlim) # Dashed lines so matches part D axes['B'].plot([df_out['Diad1_Voigt_Cent'], df_out['Diad1_Voigt_Cent']], [np.min(ydat), np.max(ydat)], ':b') if fit_peaks>2: axes['B'].plot([df_out['HB1_Cent'], df_out['HB1_Cent']], [np.min(ydat), np.max(ydat)], ':r') axes['B'].set_title('c) Fit Components') # Background fit # First, set up x and y limits on axis if config1.x_range_baseline is not None: axc_xmin=df_out['Diad1_Voigt_Cent'][0]-config1.x_range_baseline axc_xmax=df_out['Diad1_Voigt_Cent'][0]+config1.x_range_baseline else: axc_xmin=config1.lower_bck_diad1[0] axc_xmax=config1.upper_bck_diad1[1] axc_ymin=np.min(Baseline_diad1[:, 1])-config1.y_range_baseline axc_ymax=np.max(Baseline_diad1[:, 1])+config1.y_range_baseline axes['C'].set_title('d) Peaks overlain on data before subtraction') axes['C'].plot(Diad_short_diad1[:, 0], Diad_short_diad1[:, 1], '.r', label='Data') axes['C'].plot(Baseline_diad1[:, 0], Baseline_diad1[:, 1], '.b', label='bck') axes['C'].plot(Diad_short_diad1[:, 0], Py_base_diad1, '-k', label='Poly bck fit') axes['C'].set_ylabel('Intensity') axes['C'].set_xlabel('Wavenumber') if config1.fit_gauss is not False: axes['C'].plot(x_lin, components.get('bkg_')+ybase_xlin, '-m', label='Gaussian bck', linewidth=2) axes['C'].plot( x_lin ,y_best_fit+ybase_xlin, '-g', linewidth=2, label='Best Fit') axes['C'].plot(x_lin, components.get('lz1_')+ybase_xlin, '-b', label='Diad1', linewidth=1) if fit_peaks>1: axes['C'].plot(x_lin, components.get('lz2_')+ybase_xlin, '-r', label='HB1', linewidth=1) axes['C'].legend(ncol=3, loc='lower center') axes['C'].set_xlim([axc_xmin, axc_xmax]) axes['C'].set_ylim([axc_ymin, axc_ymax]) #axes['C'].plot(Diad_short[:, 0], Diad_short[:, 1], '"r', label='Data') # Residual on plot D axes['D'].set_title('f) Residuals') axes['D'].plot([df_out['Diad1_Voigt_Cent'], df_out['Diad1_Voigt_Cent']], [np.min(residual_diad_coords), np.max(residual_diad_coords)], ':b') if fit_peaks>2: axes['D'].plot([df_out['HB1_Cent'], df_out['HB1_Cent']], [np.min(residual_diad_coords), np.max(residual_diad_coords)], ':r') axes['D'].plot(xdat_inrange, residual_diad_coords, 'ok', mfc='c' ) axes['D'].plot(xdat_inrange, residual_diad_coords, '-c' ) axes['D'].set_ylabel('Residual') axes['D'].set_xlabel('Wavenumber') # axes['D'].set_xlim(ax1_xlim) # axes['D'].set_xlim(ax2_xlim) Local_Residual_diad1=residual_diad_coords[((xdat_inrange>(df_out['Diad1_Voigt_Cent'][0]-config1.x_range_residual)) &(xdat_inrange<df_out['Diad1_Voigt_Cent'][0]+config1.x_range_residual))] axes['D'].set_xlim([df_out['Diad1_Voigt_Cent'][0]-config1.x_range_residual, df_out['Diad1_Voigt_Cent'][0]+config1.x_range_residual]) #ax5.plot([cent_1117, cent_1117 ], [np.min(Local_Residual_1117)-10, np.max(Local_Residual_1117)+10], ':k') axes['D'].set_ylim([np.min(Local_Residual_diad1)-10, np.max(Local_Residual_diad1)+10]) # Overal spectra axes['E'].set_title('g) Summary plot of raw spectra for file = ' + filename) axes['E'].plot(Spectra[:, 0], Spectra[:, 1], '-r') axes['E'].set_ylabel('Intensity') axes['E'].set_xlabel('Wavenumber') path3=path+'/'+'diad_fit_images' if os.path.exists(path3): out='path exists' else: os.makedirs(path+'/'+ 'diad_fit_images', exist_ok=False) fig.tight_layout() file=filename.rsplit('.txt', 1)[0] fig.savefig(path3+'/'+'Diad1_Fit_{}.png'.format(file), dpi=config1.dpi) if close_figure is True: plt.close(fig) # Lets calculate peak skewness here. Skew50=assess_diad1_skewness(config1=diad1_fit_config(), int_cut_off=0.5, path=path, filename=filename, filetype=filetype, skewness='abs', height=1, prominence=5, width=0.5, plot_figure=False, diad_array=diad_array) Skew80=assess_diad1_skewness(config1=diad1_fit_config(), int_cut_off=0.3, path=path, filename=filename, filetype=filetype, skewness='abs', height=1, prominence=5, width=0.5, plot_figure=False, diad_array=diad_array) df_out['Diad1_Asym50']=Skew50['Skewness_diad1'] df_out['Diad1_Asym70']=Skew80['Skewness_diad1'] df_out['Diad1_Yuan2017_sym_factor']=(df_out['Diad1_fwhm'])*(df_out['Diad1_Asym50']-1) df_out['Diad1_Remigi2021_BSF']=df_out['Diad1_fwhm']/df_out['Diad1_Combofit_Height'] df_out['Diad1_PDF_Model']=model df_out = df_out.rename(columns={'reduced_chi_squared': 'Diad1_reduced_chi_squared'}) if config1.return_other_params is False: return df_out else: return df_out, result, y_best_fit, x_lin
[docs] def combine_diad_outputs(*, filename=None, prefix=True, Diad1_fit=None, Diad2_fit=None, to_csv=False, to_clipboard=False, path=None): """ This function combines the dataframes from fit_diad_2_w_bck and fit_diad_1_w_bck into a single dataframe Parameters --------------- filename: str filename data is for prefix: bool if True, splits filename on ' ', if False, leaves as is Diad1_fit: pd.DataFrame pd.DataFrame of fit from fit_diad_1_w_bck function Diad2_fit: pd.DataFrame pd.DataFrame of fit from fit_diad_2_w_bck function to_clipboard: bool If True, copies merged dataframe to clipboard to_csv: bool if True, saves fit to Csv, by making a subfolder 'Diad_Fits' inside the folder 'path' """ if prefix is True: filename = filename.split(' ', 1)[1] if Diad1_fit is not None and Diad2_fit is not None: combo=pd.concat([Diad1_fit, Diad2_fit], axis=1) # Fill any columns which dont' exist so can re-order the same every time if 'HB1_Cent' not in combo.columns: combo['HB1_Cent']=np.nan combo['HB1_Area']=np.nan combo['HB1_Sigma']=np.nan if 'HB2_Cent' not in combo.columns: combo['HB2_Cent']=np.nan combo['HB2_Area']=np.nan combo['HB2_Sigma']=np.nan if 'C13_Cent' not in combo.columns: combo['C13_Cent']=np.nan combo['C13_Area']=np.nan combo['C13_Sigma']=np.nan if 'Diad1_Gauss_Cent' not in combo.columns: combo['Diad1_Gauss_Cent']=np.nan combo['Diad1_Gauss_Area']=np.nan combo['Diad1_Gauss_Sigma']=np.nan if 'Diad2_Gauss_Cent' not in combo.columns: combo['Diad2_Gauss_Cent']=np.nan combo['Diad2_Gauss_Area']=np.nan combo['Diad2_Gauss_Sigma']=np.nan combo['Splitting']=combo['Diad2_Voigt_Cent']-combo['Diad1_Voigt_Cent'] #combo['Split_err_abs']=combo['Diad1_cent_err']+combo['Diad2_cent_err'] combo['Split_σ']=(combo['Diad1_cent_err']**2+combo['Diad2_cent_err']**2)**0.5 cols_to_move = ['Splitting', 'Split_σ', 'Diad1_Combofit_Cent', 'Diad1_cent_err', 'Diad1_Combofit_Height', 'Diad1_Voigt_Cent', 'Diad1_Voigt_Area', 'Diad1_Voigt_Sigma', 'Diad1_Residual', 'Diad1_Prop_Lor', 'Diad1_fwhm', 'Diad1_refit','Diad2_Combofit_Cent', 'Diad2_cent_err', 'Diad2_Combofit_Height', 'Diad2_Voigt_Cent', 'Diad2_Voigt_Area', 'Diad2_Voigt_Sigma', 'Diad2_Voigt_Gamma', 'Diad2_Residual', 'Diad2_Prop_Lor', 'Diad2_fwhm', 'Diad2_refit', 'HB1_Cent', 'HB1_Area', 'HB1_Sigma', 'HB2_Cent', 'HB2_Area', 'HB2_Sigma', 'C13_Cent', 'C13_Area', 'C13_Sigma', 'Diad2_Gauss_Cent', 'Diad2_Gauss_Area','Diad2_Gauss_Sigma', 'Diad1_Gauss_Cent', 'Diad1_Gauss_Area','Diad1_Gauss_Sigma', 'Diad1_Asym50', 'Diad1_Asym70', 'Diad1_Yuan2017_sym_factor', 'Diad1_Remigi2021_BSF','Diad2_Asym50', 'Diad2_Asym70', 'Diad2_Yuan2017_sym_factor', 'Diad2_Remigi2021_BSF', 'Diad1_PDF_Model', 'Diad2_PDF_Model', 'Diad1_reduced_chi_squared', 'Diad2_reduced_chi_squared'] combo_f = combo[cols_to_move + [ col for col in combo.columns if col not in cols_to_move]] combo_f=combo_f.iloc[:, 0:len(cols_to_move)] file=filename.rsplit('.txt', 1)[0] combo_f.insert(0, 'filename', file) if Diad1_fit is None and Diad2_fit is None: df=pd.DataFrame(data={'filename': filename, 'Splitting': np.nan, 'Diad1_Voigt_Cent':np.nan, 'Diad1_Voigt_Area':np.nan, 'Residual_Diad1': np.nan, 'Diad2_Voigt_Cent':np.nan, 'Diad2_Voigt_Area':np.nan, 'Residual_Diad2': np.nan, 'HB1_Cent':np.nan, 'HB1_Area':np.nan, 'HB2_Cent':np.nan, 'HB2_Area':np.nan, 'C13_Cent': np.nan, 'C13_Area': np.nan, }) combo_f=pd.concat([df, Carb_fit], axis=1) if to_clipboard is True: df.to_clipboard(excel=True, header=False, index=False) combo_f=df if to_csv is True: if path is None: raise Exception('You need to specify path= for wherever you want this saved') path_fits=path+'/'+'Diad_Fits' if os.path.exists(path_fits): out='path exists' else: dir2=os.makedirs(path+'/'+ 'Diad_Fits', exist_ok=False) #filepath=Path(path+'/'+ 'Peak_Fits'+'/'+filename) combo_f.to_csv(path+'/'+'Diad_Fits'+'/'+'fits_'+filename) return combo_f
[docs] def calculate_HB_Diad_area_ratio(df): """" Calculates two area ratios of hotbands and diads""" df_c=df.copy() df_c['HB_Diad_Ratio']=(df_c['HB1_Area']+df_c['HB2_Area'])/(df_c['Diad1_Voigt_Area']+df_c['Diad2_Voigt_Area']) df_c['HB_Total_Ratio']=(df_c['HB1_Area']+df_c['HB2_Area'])/(df_c['Diad1_Voigt_Area']+df_c['Diad2_Voigt_Area']+df_c['HB1_Area']+df_c['HB2_Area']) return df_c
## Fit generic peak
[docs] @dataclass class generic_peak_config: """ Configuration class for fitting secondary peaks (e.g. SO2, Carbonate) Attributes: name (str): Default 'Generic' Name of component you are fitting that gets stamped onto the peak parameters, e.g. if 'SO2', get Peak_Area_SO2' lower_bck, upper_bck: Tuple[float, float] Background positions in wavenumbers model_name: Default 'PseudoVoigt' Type of peak fit. Choose from 'GaussianModel', 'VoigtModel', 'PseudoVoigtModel', 'Spline'. x_range_bck: float (default 10) When showing bck collected plot, shows peak center +- this amount N_poly_carb_bck: float (default 1) Degree of polynomial to fit to background. amplitude: float (default 100) Approximate amplitude of peak fit (first guess for Gaussian , voigt and Psuedovoigt needed) cent: float (default 1090) Approximate peak position of species you are looking for. E.g. 1090 for CO2, 1150 for SO2 outlier_sigma: float (default 12) Code calculates standard deviation of background, and excludes points more than outlier_sigma*this standard deviatoin from nearby background points distance, prominence, width, threshold, height: float Scipy find peak parameters to get initial fit of peak fit_peaks (int): Default 2 Number of peaks to fit. 2 = Diad + HB, 1 = Diad. """ # Name that gets stamped onto fits name: str= 'generic' # Selecting the two background positions lower_bck: Tuple[float, float]=(1060, 1065) upper_bck: Tuple[float, float]=(1120, 1130) # model_name: str='PseudoVoigtModel' x_range_bck: float=10 # Background degree of polynomial N_poly_carb_bck: float =1 # Seletcting the amplitude amplitude: float =1000 # Selecting the peak center cent: float =1090 # outlier sigma to discard background at outlier_sigma: float =12 # Parameters for Scipy find peaks distance: float=10 prominence: float=5 width: float=6 threshold: float=0.1 height:float =100 # Excluding a range for cosmic rays etc. exclude_range: Optional [Tuple[float, float]]=None # Return other parameteres e.g. intermediate outputs return_other_params: bool = False N_peaks: int= 1 int_cut_off: float=0.05
[docs] def fit_generic_peak(*, config: generic_peak_config=generic_peak_config(), path=None, filename=None, filetype=None, plot_figure=True, dpi=200): """ This function fits a generic peak with various options config: from dataclass generic_peak_config model_name: str, 'GaussianModel', 'VoigtModel', 'PseudoVoigtModel', 'Spline', 'Poly', Fits Gaussian, Voigt, or PseudoVoigt, or fits a spline or polynomail and gets area underneath name: str Name of feature you are fitting. Used to make column headings in output (e.g., if SO2, outputs are Peak_Cent_SO2, Peak_Area_SO2) int_cut_off: float fits peak out to int_cut_off* peak height. lower_bkc: Tuple[float, float] Upper and lower x coordinate for background to left of peak upper_bkc: Tuple[float, float] Upper and lower x coordinate for background to right of peak x_range_bck: float sets xlim of figure, as the peak center + x_range_bck, and peak cent - x_range_bck N_poly_carb_bck: int Degree of polynomial to fit to the background positions amplitude: float Approx amplitude of peak. cent: float Approx cent of peak outlier_sigma: float Discards points on the background outside outlier_sigma of the median value distance, prominence, threshold, height: floats parameters for scipy find_peaks exclude_range: Optional [Tuple[float, float]] Excludes region of spectra between these values dpi: float dpi to save figure at return_other_params: bool if True, returns other peak fit parameters and fits (df, xx_carb, y_carb, result0), else just returns df of peak positions, heights etc path: str Path to file filename: str filename with file extension filetype: str choose from 'Witec_ASCII', 'headless_txt', 'headless_csv', 'head_csv', 'Witec_ASCII', 'HORIBA_txt', 'Renishaw_txt' plot_figure: bool If true, plots figure and saves it in a subfolder in the same directory as the filepath specified dpi: int dpi to save figure at Returns ----------- df of peak fits, unless return_other_params is True, then also returns x and y coordinate of fit, and fit params """ Spectra_in=get_data(path=path, filename=filename, filetype=filetype) int_cut_off=config.int_cut_off name=config.name # If exclude range, trim that here if config.exclude_range is not None: Spectra=Spectra_in[ (Spectra_in[:, 0]<config.exclude_range[0]) | (Spectra_in[:, 0]>config.exclude_range[1]) ] else: Spectra=Spectra_in lower_0baseline=config.lower_bck[0] upper_0baseline=config.lower_bck[1] lower_1baseline=config.upper_bck[0] upper_1baseline=config.upper_bck[1] # Filter out spectra outside these baselines Spectra_short=Spectra[ (Spectra[:,0]>lower_0baseline) & (Spectra[:,0]<upper_1baseline) ] Spectra_fit=Spectra[ (Spectra[:,0]>upper_0baseline) & (Spectra[:,0]<lower_1baseline) ] # To make a nice plot, give 50 wavenumber units on either side as a buffer Spectra_plot=Spectra[ (Spectra[:,0]>lower_0baseline-50) & (Spectra[:,0]<upper_1baseline+50) ] # # Find peaks using Scipy find peaks y=Spectra_fit[:, 1] x=Spectra_fit[:, 0] spec_res=np.abs(x[0]-x[1]) # # # peaks = find_peaks(y,height = config.height, threshold = config.threshold, # distance = config.distance, prominence=config.prominence, width=config.width) # # height = peaks[1]['peak_heights'] #list of the heights of the peaks # peak_pos = x[peaks[0]] #list of the peaks positions # df_sort=pd.DataFrame(data={'pos': peak_pos, # 'height': height}) # # df_peak_sort=df_sort.sort_values('height', axis=0, ascending=False) # # # Trim number of peaks based on user-defined N peaks # df_peak_sort_short=df_peak_sort[0:config.N_peaks] # Get actual baseline Baseline_with_outl=Spectra_short[ ((Spectra_short[:, 0]<upper_0baseline) &(Spectra_short[:, 0]>lower_0baseline)) | ((Spectra_short[:, 0]<upper_1baseline) &(Spectra_short[:, 0]>lower_1baseline))] # Calculates the LH baseline LH_baseline=Spectra_short[ ((Spectra_short[:, 0]<upper_0baseline) &(Spectra_short[:, 0]>lower_0baseline))] Mean_LH_baseline=np.nanmean(LH_baseline[:, 1]) Std_LH_baseline=np.nanstd(LH_baseline[:, 1]) # Calculates the RH baseline RH_baseline=Spectra_short[((Spectra_short[:, 0]<upper_1baseline) &(Spectra_short[:, 0]>lower_1baseline))] Mean_RH_baseline=np.nanmean(RH_baseline[:, 1]) Std_RH_baseline=np.nanstd(RH_baseline[:, 1]) # Removes points outside baseline LH_baseline_filt=LH_baseline[(LH_baseline[:, 1]<Mean_LH_baseline+config.outlier_sigma*Std_LH_baseline) &(LH_baseline[:, 1]>Mean_LH_baseline-config.outlier_sigma*Std_LH_baseline) ] RH_baseline_filt=RH_baseline[(RH_baseline[:, 1]<Mean_RH_baseline+config.outlier_sigma*Std_RH_baseline) &(RH_baseline[:, 1]>Mean_RH_baseline-config.outlier_sigma*Std_RH_baseline) ] Baseline=np.concatenate((LH_baseline_filt, RH_baseline_filt), axis=0) # Fits a polynomial to the baseline of degree Pf_baseline = np.poly1d(np.polyfit(Baseline[:, 0], Baseline[:, 1], config.N_poly_carb_bck)) Py_base =Pf_baseline(Spectra_short[:, 0]) Py_base_fit =Pf_baseline(Spectra_fit[:, 0]) Baseline_ysub=Pf_baseline(Baseline[:, 0]) Baseline_x=Baseline[:, 0] y_corr= Spectra_short[:, 1]-Py_base y_corr_fit=Spectra_fit[:, 1]-Py_base_fit x_fit=Spectra_fit[:, 0] x_corr=Spectra_short[:, 0] if config.model_name not in ['GaussianModel', 'VoigtModel', 'PseudoVoigtModel', 'Spline']: raise TypeError('model_name not a permitted input, Please select either GaussianModel, VoigtModel, PseudoVoigtModel, or Spline') if config.model_name in ['GaussianModel', 'VoigtModel', 'PseudoVoigtModel']: # NOw into the voigt fitting if config.model_name=='VoigtModel': model0 = VoigtModel()#+ ConstantModel() if config.model_name == 'PseudoVoigtModel': model0 = PseudoVoigtModel() if config.model_name=='GaussianModel': model0=GaussianModel() # create parameters with initial values pars0 = model0.make_params() pars0['center'].set(config.cent, min=config.cent-30, max=config.cent+30) pars0['amplitude'].set(config.amplitude, min=0) init0 = model0.eval(pars0, x=x) result0 = model0.fit(y_corr_fit, pars0, x=x) Center_p0=result0.best_values.get('center') area_p0=result0.best_values.get('amplitude') # Make a nice linspace for plotting with smooth curves. xx_carb=np.linspace(min(x), max(x), 2000) y_carb=result0.eval(x=xx_carb) height=np.max(y_carb) # Moved this form down below, think it belongs here if area_p0 is None: area_p0=np.nan if area_p0 is not None: if area_p0<0: area_p0=np.nan df=pd.DataFrame(data={'filename': filename, 'Peak_Cent_{}'.format(name): Center_p0, 'Peak_Area_{}'.format(name): area_p0, 'Peak_Height_{}'.format(name): height, 'Model_name': config.model_name}, index=[0]) else: if config.model_name == 'Spline': from scipy.interpolate import CubicSpline # fit a spline first to find intensity cut off mix_spline_sil = interp1d(x_fit, y_corr_fit, kind='cubic') x_new=np.linspace(np.min(x_fit), np.max(x_fit), 1000) Baseline_ysub_sil=mix_spline_sil(x_new) x_max = x_new[np.argmax(Baseline_ysub_sil)] cent=x_max Peak_Center=x_max max_y=np.max(y_corr_fit) # Lets trim x and y based on intensity values # Find intensity cut off y_int_cut=max_y*int_cut_off # Check peak isnt at edge of window, else wont work if Peak_Center==np.max(x_fit) or Peak_Center==np.min(x_fit): print('peak at edge of window, setting params to nans') df=pd.DataFrame(data={'filename': filename, 'Peak_Cent_{}'.format(name): np.nan, 'Peak_Area_{}'.format(name): np.nan, 'Peak_Height_{}'.format(name): np.nan, 'Model_name': config.model_name}, index=[0]) else: # Split the array into a LHS and a RHS LHS_y=Baseline_ysub_sil[x_new<Peak_Center] # Was originally <= RHS_y=Baseline_ysub_sil[x_new>Peak_Center] LHS_x=x_new[x_new<Peak_Center] # Was originally <= RHS_x=x_new[x_new>Peak_Center] # Need to flip LHS to put into the find closest function LHS_y_flip=np.flip(LHS_y) LHS_x_flip=np.flip(LHS_x) val=np.argmax(LHS_y_flip<y_int_cut) if val == 0 and not (LHS_y_flip[0] < y_int_cut): val = np.argmin(LHS_y_flip) val2=np.argmax(RHS_y<y_int_cut) if val2 == 0 and not (RHS_y[0] < y_int_cut): val2 = np.argmin(RHS_y) # Find nearest x unit to this value y_nearest_LHS=LHS_y_flip[val] x_nearest_LHS=LHS_x_flip[val] y_nearest_RHS=RHS_y[val2] x_nearest_RHS=RHS_x[val2] diff_LHS=Peak_Center-x_nearest_LHS diff_RHS=x_nearest_RHS-Peak_Center n_res=0 x_new_skewness=np.linspace(x_nearest_LHS-spec_res*n_res, x_nearest_RHS+spec_res*n_res) x_new=x_new_skewness Baseline_ysub_sil=mix_spline_sil(x_new_skewness) N_poly_sil='Spline' xspace_sil=x_new[1]-x_new[0] area_trap = trapezoid(Baseline_ysub_sil, dx=xspace_sil) area_simps = simpson(Baseline_ysub_sil, dx=xspace_sil) area_p0=area_trap df=pd.DataFrame(data={'filename': filename, 'Peak_Cent_{}'.format(name): x_max, 'Peak_Area_{}'.format(name): area_p0, 'Peak_Height_{}'.format(name): max_y, 'Model_name': config.model_name}, index=[0]) # Plotting what its doing if plot_figure is True: fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10,5)) fig.suptitle('Secondary Phase, file= '+ str(filename), fontsize=12, x=0.5, y=1.0) # Plot the peak positions and heights ax1.set_title('Background fit') ax1.plot(Spectra_plot[:, 0], Spectra_plot[:, 1], '-r', label='Spectra') ax1.plot(RH_baseline_filt[:, 0], RH_baseline_filt[:, 1], '.b', label='bck points') ax1.plot(LH_baseline_filt[:, 0], LH_baseline_filt[:, 1], '-b', lw=3, label='_bck points') #ax2.plot(x, y_corr_fit, '-r', label='Bck-sub data', lw=0.5) ax2.plot(x, y_corr_fit, '.r', label='Bck-sub data') ax1.plot(Spectra_short[:, 0], Py_base, '-k', label='Bck Poly') if config.model_name != 'Spline' and config.model_name != 'Poly': ax2.plot(xx_carb, y_carb, '-k', label='Peak fit') else: ax2.plot(x_new, Baseline_ysub_sil, '-k') ax2.fill_between(x_new, Baseline_ysub_sil, color='yellow', label='fit area', alpha=0.5) cent=df['Peak_Cent_{}'.format(name)].iloc[0] ax2.set_xlim([cent-config.x_range_bck, cent+config.x_range_bck]) ax2.set_title('Bkg-subtracted, ' + name + ' peak fit') ax1.set_ylabel('Intensity') ax1.set_xlabel('Wavenumber (cm$^{-1}$)') ax2.set_ylabel('Bck-corrected Intensity') ax2.set_xlabel('Wavenumber (cm$^{-1}$)') ax1.legend() ax2.legend() ax1.set_ylim([ np.min(Spectra_plot[:, 1])-50, np.max(Spectra_plot[:, 1])+50 ]) file=filename.rsplit('.txt', 1)[0] if plot_figure is True: path3=path+'/'+'Secondary_fit_Images' if os.path.exists(path3): out='path exists' else: os.makedirs(path3, exist_ok=False) file=filename.rsplit('.txt', 1)[0] fig.savefig(path3+'/'+ name +'_Fit_{}.png'.format(file), dpi=dpi) if config.return_other_params is True: return df, xx_carb, y_carb, result0 else: return df
[docs] def peak_prominence(x, y, peak_position): """ This function calculates the approximate prominence of a peak, by finding the local minimum either side of the peak Parameters ---------- x: np.array x coordinates (wavenumber) y: np.array y coordinates (intensity) peak:position: int, float Peak position to calculate prominence for Returns -------- float: prominence of peak in y coordinates. """ # Find the index of the peak position peak_index = np.where(x == peak_position)[0][0] # Find the local minimums on both sides of the peak left_valley_index = peak_index right_valley_index = peak_index # Find the left valley while left_valley_index > 0 and y[left_valley_index] >= y[left_valley_index - 1]: left_valley_index -= 1 # Find the right valley while right_valley_index < len(x) - 1 and y[right_valley_index] >= y[right_valley_index + 1]: right_valley_index += 1 # Calculate prominence as the difference between peak height and higher valley peak_height = y[peak_index] left_valley_height = y[left_valley_index] right_valley_height = y[right_valley_index] prominence = peak_height - max(left_valley_height, right_valley_height) return prominence
# Plotting up secondary peaks from scipy.signal import find_peaks
[docs] def plot_secondary_peaks(*, Diad_Files, path, filetype, xlim_plot=[1040, 1200], xlim_peaks=[1060, 1100], height=100, threshold=0.1, distance=10, prominence=5, width=6, prominence_filter=False, sigma=3, sigma_window=30, find_peaks_filter=False, just_plot=False, yscale=0.2, gaussian_smooth=False, smoothing_window=5, smooth_std=3): """ This function plots spectra stacked vertically ontop of each other, in a specific region. It also has two options to identify peak positions. Parameters --------------- Diad_Files: list List of filenames to plot path: str Path where spectra are stored filetype: str Choose from 'Witec_ASCII', 'headless_txt', 'headless_csv', 'head_csv', 'Witec_ASCII', 'HORIBA_txt', 'Renishaw_txt' xlim_plot: Tuple[float, float] Range of x coordinates to make plot over xlim_peaks: Tuple[float, float] Range of x coordinates to search for peaks in Optional parameters for Smoothing spectra prior to finding peaks gaussian_smooth:bool - If True, applies a gaussian smoothing filter using np.convolve defined by a smoothing_window and a smooth_std. If you want to actually ID peaks rather than just plot them, choose from: prominence_filter: bool if True, finds max y coordinate in the window xlim_peaks. It then calculates the prominence by comparing this peak height to the average of the median of the 3 datapoints at the left and right of the window. If the peak is more than 'prominence' units above the average of these two end point medians, it is classified as a peak. Also need to specify prominence: float find_peaks_filter: bool if True, uses scipy find peaks to find peaks. Can tweak distance, prominence, width, threshold, and height. just_plot: bool if True, just plots spectra, doesnt try to find peaks y_scale: float Figure size is 2+y_scale * number of spectra Returns ------------- df_peaks, x_data, y_data, fig df_peaks: pd.DataFrame for each file of peak position, height, prominence and file name. Filled with Nans if didnt find a peak x_data: np.array of x coordinates (Assumes the same for all files) y_data: np.array of y coordinates of each spectra fig: figure it has plotted """ if prominence_filter is True and (find_peaks_filter is True or just_plot is True): raise TypeError('Only one of sigma_filter, ind_peaks_filter and just_plot can be True') fig, (ax1) = plt.subplots(1, 1, figsize=(10,2+yscale*len(Diad_Files))) i=0 Y=0 peak_pos_saved=np.zeros(len(Diad_Files), dtype=float) peak_prom_saved=np.zeros(len(Diad_Files), dtype=float) peak_height_saved=np.zeros(len(Diad_Files), dtype=float) peak_bck=np.zeros(len(Diad_Files), dtype=float) y_star=np.zeros(len(Diad_Files), dtype=float) yplot=np.zeros(len(Diad_Files), dtype=float) Diad_df=get_data(path=path, filename=Diad_Files[0], filetype=filetype) x_data=Diad_df[:, 0] y_data=np.zeros([ len(x_data), len(Diad_Files)], float) for file in Diad_Files: Diad_df=get_data(path=path, filename=file, filetype=filetype) Diad=np.array(Diad_df) # First lets use find peaks y2=Diad[:, 1] x=Diad[:, 0] if gaussian_smooth is True: y = np.convolve(y2, gaussian(smoothing_window, std=smooth_std), mode='same') else: y=y2 # Region of interest Region_peaks = ((x>xlim_peaks[0])& (x<xlim_peaks[1])) Region_plot=((x>xlim_plot[0])& (x<xlim_plot[1])) x_trim=x[Region_peaks] y_trim=y[Region_peaks] x_plot=x[Region_plot] y_plot=y[Region_plot] y_data[:, i]=y # IF using scipy find peaks if find_peaks_filter is True: # Find peaks for trimmed spectra (e.g. in region peaks have been asked for) peaks = find_peaks(y_trim,height = height, threshold = threshold, distance = distance, prominence=prominence, width=width) height_PEAK = peaks[1]['peak_heights'] #list of the heights of the peaks peak_pos = x_trim[peaks[0]] #list of the peaks positions df_sort=pd.DataFrame(data={'pos': peak_pos, 'height': height_PEAK}) df_peak_sort=df_sort.sort_values('height', axis=0, ascending=False) # Trim number of peaks based on user-defined N peaks #print(df_peak_sort_short) if len(df_peak_sort>=1): df_peak_sort_short=df_peak_sort[0:1] peak_pos_saved[i]=df_peak_sort_short['pos'].values[0] peak_height_saved[i]=df_peak_sort_short['height'].values[0] else: peak_pos_saved[i]=np.nan peak_height_saved[i]=np.nan peak_bck[i]=np.quantile(y_plot, 0.2) av_y=np.quantile(y_plot, 0.5) Diff=np.max(y_plot)-np.min(y_plot) Y_sum=np.max(y_plot)/Diff ax1.plot(x_plot, ((y_plot-np.min(y_plot))/Diff)+i, '-r') ax1.plot(x_plot, ((y_plot-np.min(y_plot))/Diff)+i, '.k', ms=1) if len(height_PEAK)>0: ax1.plot(df_peak_sort_short['pos'], (df_peak_sort_short['height']-np.min(y_plot))/Diff+i, '*k', mfc='yellow', ms=10) yplot[i]=np.min(((y_plot-np.min(y_plot))/Diff)+i) ax1.annotate(str(file), xy=(xlim_plot[1]+0.5, yplot[i]), xycoords="data", fontsize=8) #ax1.set_xlim(xlim) Y=Y+Y_sum # calculate prominence y_edge1=np.median(y_trim[0:3]) y_edge2=np.median(y_trim[-3:]) peak_prom_saved[i]=peak_height_saved[i]-(y_edge1+y_edge2)/2 i=i+1 elif prominence_filter is True: # Find max value in region maxy=np.max(y_trim) xpos=x_trim[y_trim==maxy][0] #print(xpos) # if (xpos+sigma_window)>np.max(x_trim): # Raise TypeError('Your peak finding window is smaller than sigma window') # y_around_max=y_trim[(x_trim>(xpos+sigma_window))&(x_trim<(xpos-sigma_window))] # stdy=np.nanstd(y_around_max) # mediany=np.quantile(y_around_max, 0.5) # if maxy>mediany+stdy*sigma+prominence: # pos_x=x_trim[y_trim==maxy][0] # else: # pos_x=np.nan #prominence_calc = peak_prominence(x_trim, y_trim, xpos) y_edge1=np.median(y_trim[0:3]) y_edge2=np.median(y_trim[-3:]) #print(y_edge2) prominence_calc=maxy-(y_edge1+y_edge2)/2 peak_prom_saved[i]=prominence_calc if prominence_calc>prominence: pos_x=x_trim[y_trim==maxy][0] else: pos_x=np.nan if pos_x>0: peak_pos_saved[i]=pos_x peak_height_saved[i]=maxy else: peak_pos_saved[i]=np.nan peak_height_saved[i]=np.nan peak_bck[i]=np.quantile(y_plot, 0.2) av_y=np.quantile(y_plot, 0.5) Diff=np.max(y_plot)-np.min(y_plot) Y_sum=np.max(y_plot)/Diff # IF there is something that passes the filter if pos_x>0: y_star[i]=(peak_height_saved[i]-np.min(y_plot))/Diff+i ax1.plot(pos_x, y_star[i], '*k', mfc='yellow', ms=10) else: y_star[i]=np.nan ax1.plot(x_plot, ((y_plot-np.min(y_plot))/Diff)+i, '-r') ax1.plot(x_plot, ((y_plot-np.min(y_plot))/Diff)+i, '.k', ms=1) yplot[i]=np.min(((y_plot-np.min(y_plot))/Diff)+i) ax1.annotate(str(file), xy=(xlim_plot[1]+0.5, yplot[i]), xycoords="data", fontsize=8) Y=Y+Y_sum i=i+1 elif just_plot is True: Diff=np.max(y_plot)-np.min(y_plot) Y_sum=np.max(y_plot)/Diff ax1.plot(x_plot, ((y_plot-np.min(y_plot))/Diff)+i, '-r') ax1.plot(x_plot, ((y_plot-np.min(y_plot))/Diff)+i, '.k', ms=1) yplot[i]=np.min(((y_plot-np.min(y_plot))/Diff)+i) ax1.annotate(str(file), xy=(xlim_plot[1]+0.5, yplot[i]), xycoords="data", fontsize=8) Y=Y+Y_sum i=i+1 df_peaks=pd.DataFrame(data={'pos': peak_pos_saved, 'height': peak_height_saved, 'prom': peak_prom_saved}) #if sigma_filter is True: #ax1.plot(df_peaks['pos'][y_star>0], y_star[y_star>0], '*k', mfc='yellow', ms=10) ax1.plot([1078, 1078], [0, yplot[-1]+2], '-g', lw=1, label='Na$_2$CO$_3$') ax1.plot([1085, 1085], [0, yplot[-1]+2], '-', color='cornflowerblue', lw=1, label='CaCO$_3$') ax1.plot([1094, 1094], [0, yplot[-1]+2], '-c', lw=1, label='MgCO$_3$') ax1.plot([1097, 1097], [0, yplot[-1]+2], '-b', lw=1, label='Dolomite') ax1.plot([1131, 1131], [0, yplot[-1]+2], '-', color='grey', lw=1, label='CaSO$_4$') ax1.plot([1136, 1136], [0, yplot[-1]+2], '--', color='rosybrown', lw=1, label='MgSO$_4$') ax1.plot([1151, 1151], [0, yplot[-1]+2], '-k', lw=1, label='SO$_2$') ax1.fill_between(xlim_peaks, -0.5, yplot[-1]+3, color='blue', alpha=0.1) ax1.legend(ncol=7,loc='upper center', fontsize=10, bbox_to_anchor=[0.5, 1.05]) ax1.set_ylim([-0.5, yplot[-1]+3]) ax1.set_xlim([xlim_plot[0], xlim_plot[1]+0.5]) # df_peaks=pd.DataFrame(data={'filename': Diad_Files, # 'pos': peak_pos_saved, # 'prom': peak_height_saved-peak_bck}) ax1.set_xlabel('Wavenumber (cm$^{-1}$)') ax1.set_yticks([]) x_data=x_trim df_peaks['file_names']=Diad_Files return df_peaks, x_data, y_data, fig
## Diad Skewness from DeVitre et al. import pandas as pd import numpy as np import matplotlib.pyplot as plt from scipy.signal import find_peaks # New things needed here from scipy.interpolate import interp1d from scipy.signal import find_peaks import os from os import listdir from os.path import isfile, join from DiadFit.importing_data_files import * from DiadFit.diads import * np.seterr (invalid='raise') encode="ISO-8859-1"
[docs] def assess_diad1_skewness(*, config1: diad1_fit_config=diad1_fit_config(), int_cut_off=0.3, path=None, filename=None, filetype=None, skewness='abs', height=1, prominence=5, width=0.5, plot_figure=True, peak_fit_routine=False, peak_pos=None, peak_height=None, dpi=200, diad_array=None): """ Assesses Skewness of Diad peaks. Useful for identifying mixed L + V phases (see DeVitre et al. in review) Parameters ----------- config1: diad1_fit_config() dataclass. Import parameters for this function are. lower_bck_diad1, upper_bck_diad1: [float, float] background position to left and right of overal diad region (inc diad+-HB+-C13) N_poly_bck_diad1: int degree of polynomial to fit to the background. exclude_range1, exclude_range2: Tuple[float, float] Range to exclude int_cut_off: float Value of intensity at which to calculate the skewness (e.g. 0.15X the peak height). path: str Folder user wishes to read data from filename: str Specific file being read filetype: str Identifies type of file choose from 'Witec_ASCII', 'headless_txt', 'headless_csv', 'head_csv', 'Witec_ASCII', 'HORIBA_txt', 'Renishaw_txt' skewness: 'abs' or 'dir' If 'abs', gives absolute skewness (e.g., largest possible number regardless of direction) if 'dir' does skewness as one side over the other height, prominence, width: float Scipy find peak parameters plot_figure: bool if True, plots and saves a figure. dpi: int dpi to save image at peak_fit_routine: bool (default False) - if True, doesnt find max of spline, but uses the peak position you have found from the diad fitting functions. Also needs peak_pos (peak center in cm-1) and peak_height Returns ----------- pd.DataFrame with information about file and skewness parameters. """ y_corr_diad1, Py_base_diad1, x_diad1, Diad_short, Py_base_diad1, Pf_baseline, Baseline_ysub_diad1, Baseline_x_diad1, Baseline, span=remove_diad_baseline( path=path, filename=filename, filetype=filetype, N_poly=config1.N_poly_bck_diad1, lower_bck=config1.lower_bck_diad1, upper_bck=config1.upper_bck_diad1, plot_figure=False, diad_array=diad_array) x_lin_baseline=np.linspace(config1.lower_bck_diad1[0], config1.upper_bck_diad1[1], 100000) ybase_xlin=Pf_baseline(x_lin_baseline) # Get x and y ready to make a cubic spline through data x=x_diad1 y=y_corr_diad1 # Fit a cubic spline f2 = interp1d(x, y, kind='cubic') x_new=np.linspace(np.min(x), np.max(x), 100000) y_cub=f2(x_new) # Use Scipy find peaks to get the biggest peak if peak_fit_routine is False: height=height peaks = find_peaks(y_cub, height=height, prominence=prominence, width=width) peak_height=peaks[1]['peak_heights'] peak_pos = x_new[peaks[0]] # find max peak. put into df because i'm lazy peak_df=pd.DataFrame(data={'pos': peak_pos, 'height': peak_height}) # Find bigest peaks, df_peak_sort=peak_df.sort_values('height', axis=0, ascending=False) df_peak_sort_trim=df_peak_sort[0:1] Peak_Center=df_peak_sort_trim['pos'] Peak_Height=df_peak_sort_trim['height'] # now we are using peak parameters we got from Voigt/Pseudovoigt etc. if peak_fit_routine is True: Peak_Height=peak_height Peak_Center=peak_pos # Find intensity cut off y_int_cut=Peak_Height.iloc[0]*int_cut_off # Split the array into a LHS and a RHS LHS_y=y_cub[x_new<=Peak_Center.values] RHS_y=y_cub[x_new>Peak_Center.values] LHS_x=x_new[x_new<=Peak_Center.values] RHS_x=x_new[x_new>Peak_Center.values] # Need to flip LHS to put into the find closest function LHS_y_flip=np.flip(LHS_y) LHS_x_flip=np.flip(LHS_x) val=np.argmax(LHS_y_flip<y_int_cut) val2=np.argmax(RHS_y<y_int_cut) # Find nearest x unit to this value y_nearest_LHS=LHS_y_flip[val] x_nearest_LHS=LHS_x_flip[val] y_nearest_RHS=RHS_y[val2] x_nearest_RHS=RHS_x[val2] # Return Skewness LHS_Center=abs(x_nearest_LHS-Peak_Center) RHS_Center=abs(x_nearest_RHS-Peak_Center) #Also get option to always take biggest value of skewness if skewness=='abs': if LHS_Center.values>RHS_Center.values: AS=LHS_Center.values/RHS_Center.values else: AS=RHS_Center.values/LHS_Center.values elif skewness=='dir': AS=RHS_Center.values/LHS_Center.values if plot_figure is True: # Make pretty figure showing background subtractoin routine fig, ((ax2, ax1), (ax3, ax4)) = plt.subplots(2, 2, figsize=(10,10)) fig.suptitle('Diad 1 Skewness file= '+ str(filename), fontsize=16, x=0.5, y=1.0) ax2.set_title('a) Spectra') ax2.plot(Diad_short[:, 0], Diad_short[:, 1], '-r', label='Data') ax2.set_xlim([config1.lower_bck_diad1[0]+40, config1.upper_bck_diad1[1]-40]) ax1.set_title('b) Background fit') ax1.plot(Diad_short[:, 0], Diad_short[:, 1], '.-c', label='Data') ax1.plot(Baseline[:, 0], Baseline[:, 1], '.b', label='bck') ax1.plot(Diad_short[:, 0], Py_base_diad1, '-k', label='bck fit') ax3.set_title('c) Background subtracted') ax3.plot(x_diad1,y_corr_diad1, '-r', label='Bck sub') # Adds cubic interpoloation for inspection ax4.set_title('d) Cubic Spline') ax4.plot([x_nearest_LHS, Peak_Center.iloc[0]], [y_int_cut, y_int_cut], '-g') ax4.annotate(str(np.round(LHS_Center.values[0], 2)), xy=(x_nearest_LHS-3, y_int_cut+(Peak_Height-y_int_cut)/10), xycoords="data", fontsize=12, color='green') ax4.plot(x_nearest_LHS, y_nearest_LHS, '*k', mfc='green',ms=15, label='RH tie') ax4.plot([Peak_Center.iloc[0], x_nearest_RHS], [y_int_cut, y_int_cut], '-', color='grey') ax4.annotate(str(np.round(RHS_Center.values[0], 2)), xy=(x_nearest_RHS+3, y_int_cut+(Peak_Height.iloc[0]-y_int_cut)/10), xycoords="data", fontsize=12, color='grey') ax4.plot(x_nearest_RHS, y_nearest_RHS, '*k', mfc='grey', ms=15, label='LH tie') ax4.plot(x, y, '.r') ax4.plot(x_new, y_cub, '-k') ax4.plot([Peak_Center.iloc[0], Peak_Center.iloc[0]], [Peak_Height.iloc[0], Peak_Height.iloc[0]], '*k', mfc='blue', ms=15, label='Scipy Center') # Add to plot ax4.plot(config1.lower_bck_diad1[0], config1.upper_bck_diad1[1], [y_int_cut, y_int_cut], ':r') ax4.set_xlim([x_nearest_LHS-10, x_nearest_RHS+10]) ax4.set_ylim([0-10, Peak_Height.iloc[0]*1.2]) ax4.plot([Peak_Center.iloc[0], Peak_Center.iloc[0]], [0, Peak_Height.iloc[0]], ':b') ax4.annotate('Skewness='+str(np.round(AS[0], 2)), xy=(x_nearest_RHS+2, y_int_cut+y_int_cut+(Peak_Height.iloc[0]-y_int_cut)/10), xycoords="data", fontsize=12) ax2.legend() ax1.legend() ax3.legend() ax4.legend() ax1.set_xlabel('Wavenumber (cm$^{-1}$)') ax2.set_xlabel('Wavenumber (cm$^{-1}$)') ax3.set_xlabel('Wavenumber (cm$^{-1}$)') ax4.set_xlabel('Wavenumber (cm$^{-1}$)') ax1.set_ylabel('Intensity') ax2.set_ylabel('Intensity') ax3.set_ylabel('Intensity') ax4.set_ylabel('Intensity') fig.tight_layout() path3=path+'/'+'Skewness_images' if os.path.exists(path3): out='path exists' else: os.makedirs(path+'/'+ 'Skewness_images', exist_ok=False) file=filename.rsplit('.txt', 1)[0] fig.savefig(path3+'/'+'Diad1_skewness_{}.png'.format(file), dpi=dpi) df_out=pd.DataFrame(data={'filename':filename, 'Skewness_diad1': AS, 'LHS_tie_diad1': x_nearest_LHS, 'RHS_tie_diad1': x_nearest_RHS}) return df_out
[docs] def assess_diad2_skewness(*, config1: diad2_fit_config=diad1_fit_config(), int_cut_off=0.3, path=None, filename=None, filetype=None, skewness='abs', height=1, prominence=5, width=0.5, plot_figure=True, dpi=200, peak_fit_routine=False, peak_pos=None, peak_height=None, diad_array=None): """ Assesses Skewness of Diad peaks. Useful for identifying mixed L + V phases (see DeVitre et al. in review) Parameters ----------- config1: diad2_fit_config() dataclass. Import parameters for this function are. lower_bck_diad2, upper_bck_diad2: [float, float] background position to left and right of overal diad region (inc diad+-HB+-C13) N_poly_bck_diad1: int degree of polynomial to fit to the background. exclude_range1, exclude_range2: Tuple[float, float] Range to exclude int_cut_off: float Value of intensity at which to calculate the skewness (e.g. 0.15X the peak height). path: str Folder user wishes to read data from filename: str Specific file being read filetype: str Identifies type of file choose from 'Witec_ASCII', 'headless_txt', 'headless_csv', 'head_csv', 'Witec_ASCII', 'HORIBA_txt', 'Renishaw_txt' skewness: 'abs' or 'dir' If 'abs', gives absolute skewness (e.g., largest possible number regardless of direction) if 'dir' does skewness as one side over the other height, prominence, width: float Scipy find peak parameters plot_figure: bool if True, plots and saves a figure. dpi: int dpi to save image at peak_fit_routine: bool (default False) - if True, doesnt find max of spline, but uses the peak position you have found from the diad fitting functions. Also needs peak_pos (peak center in cm-1) and peak_height Returns ----------- pd.DataFrame with information about file and skewness parameters. """ # First, do the background subtraction y_corr_diad2, Py_base_diad2, x_diad2, Diad_short, Py_base_diad2, Pf_baseline, Baseline_ysub_diad2, Baseline_x_diad2, Baseline, span=remove_diad_baseline( path=path, filename=filename, filetype=filetype, N_poly=config1.N_poly_bck_diad2, lower_bck=config1.lower_bck_diad2, upper_bck=config1.upper_bck_diad2, plot_figure=False, diad_array=diad_array) x_lin_baseline=np.linspace(config1.lower_bck_diad2[0], config1.upper_bck_diad2[1], 100000) ybase_xlin=Pf_baseline(x_lin_baseline) # Get x and y for cubic spline x=x_diad2 y=y_corr_diad2 # Fits a cubic spline f2 = interp1d(x, y, kind='cubic') x_new=np.linspace(np.min(x), np.max(x), 100000) y_cub=f2(x_new) if peak_fit_routine is False: # Use Scipy find peaks to get that cubic peak peaks = find_peaks(y_cub, height=height, prominence=prominence, width=width) peak_height=peaks[1]['peak_heights'] peak_pos = x_new[peaks[0]] # find max peak. put into df because i'm lazy peak_df=pd.DataFrame(data={'pos': peak_pos, 'height': peak_height}) df_peak_sort=peak_df.sort_values('height', axis=0, ascending=False) df_peak_sort_trim=df_peak_sort[0:1] Peak_Center=df_peak_sort_trim['pos'].iloc[0] Peak_Height=df_peak_sort_trim['height'].iloc[0] if peak_fit_routine is True: Peak_Height=peak_height[0] Peak_Center=peak_pos[0] # Find intensity cut off y_int_cut=Peak_Height*int_cut_off # Split the array into a LHS and a RHS LHS_y=y_cub[x_new<=Peak_Center] RHS_y=y_cub[x_new>Peak_Center] LHS_x=x_new[x_new<=Peak_Center] RHS_x=x_new[x_new>Peak_Center] # Need to flip LHS to put into the find closest function LHS_y_flip=np.flip(LHS_y) LHS_x_flip=np.flip(LHS_x) val=np.argmax(LHS_y_flip<y_int_cut) val2=np.argmax(RHS_y<y_int_cut) # Find nearest x unit to this value y_nearest_LHS=LHS_y_flip[val] x_nearest_LHS=LHS_x_flip[val] y_nearest_RHS=RHS_y[val2] x_nearest_RHS=RHS_x[val2] # Return Skewness LHS_Center=abs(x_nearest_LHS-Peak_Center) RHS_Center=abs(x_nearest_RHS-Peak_Center) #Added conditional to always have a ratio of the high/low if skewness=='abs': if LHS_Center>RHS_Center: AS=LHS_Center/RHS_Center else: AS=RHS_Center/LHS_Center elif skewness=='dir': AS=RHS_Center/LHS_Center if plot_figure is True: # Make pretty figure showing background subtractoin routine fig, ((ax2, ax1), (ax3, ax4)) = plt.subplots(2, 2, figsize=(10,10)) fig.suptitle('Diad 2 Skewness file= '+ str(filename), fontsize=16, x=0.5, y=1.0) ax2.set_title('a) Spectra') ax2.plot(Diad_short[:, 0], Diad_short[:, 1], '-r', label='Data') ax2.set_xlim([config1.lower_bck_diad2[0]+40, config1.upper_bck_diad2[1]-40]) ax1.set_title('b) Background fit') ax1.plot(Diad_short[:, 0], Diad_short[:, 1], '.-r', label='Data') ax1.plot(Baseline[:, 0], Baseline[:, 1], '.b', label='bck') ax1.plot(Diad_short[:, 0], Py_base_diad2, '-k', label='bck fit') ax3.set_title('c) Background subtracted') ax3.plot( x_diad2,y_corr_diad2, '-r', label='Bck sub') # Adds cubic interpoloation for inspection ax4.set_title('d) Cubic Spline') ax4.plot([x_nearest_LHS, Peak_Center], [y_int_cut, y_int_cut], '-g') ax4.annotate(str(np.round(LHS_Center, 2)), xy=(x_nearest_LHS-3, y_int_cut+(Peak_Height-y_int_cut)/10), xycoords="data", fontsize=12, color='green') ax4.plot(x_nearest_LHS, y_nearest_LHS, '*k', mfc='green',ms=15, label='RH tie') ax4.plot([Peak_Center, x_nearest_RHS], [y_int_cut, y_int_cut], '-', color='grey') ax4.annotate(str(np.round(RHS_Center, 2)), xy=(x_nearest_RHS+3, y_int_cut+(Peak_Height-y_int_cut)/10), xycoords="data", fontsize=12, color='grey') ax4.plot(x_nearest_RHS, y_nearest_RHS, '*k', mfc='grey', ms=15, label='LH tie') ax4.plot(x, y, '.r') ax4.plot(x_new, y_cub, '-k') ax4.plot([Peak_Center, Peak_Center], [Peak_Height, Peak_Height], '*k', mfc='blue', ms=15, label='Scipy Center') # Add to plot ax4.plot(config1.lower_bck_diad2[0], config1.upper_bck_diad2[1], [y_int_cut, y_int_cut], ':r') ax4.set_xlim([x_nearest_LHS-10, x_nearest_RHS+10]) ax4.set_ylim([0-10, Peak_Height*1.2]) ax4.plot([Peak_Center, Peak_Center], [0, Peak_Height], ':b') ax4.annotate('Skewness='+str(np.round(AS, 2)), xy=(x_nearest_RHS+2, y_int_cut+y_int_cut+(Peak_Height-y_int_cut)/10), xycoords="data", fontsize=10) ax2.legend() ax1.legend() ax3.legend() ax4.legend() ax1.set_xlabel('Wavenumber (cm$^{-1}$)') ax2.set_xlabel('Wavenumber (cm$^{-1}$)') ax3.set_xlabel('Wavenumber (cm$^{-1}$)') ax4.set_xlabel('Wavenumber (cm$^{-1}$)') ax1.set_ylabel('Intensity') ax2.set_ylabel('Intensity') ax3.set_ylabel('Intensity') ax4.set_ylabel('Intensity') fig.tight_layout() path3=path+'/'+'Skewness_images' if os.path.exists(path3): out='path exists' else: os.makedirs(path+'/'+ 'Skewness_images', exist_ok=False) file=filename.rsplit('.txt', 1)[0] fig.savefig(path3+'/'+'Diad2_skewness_{}.png'.format(file), dpi=dpi) df_out=pd.DataFrame(data={ 'Skewness_diad2': AS, 'LHS_tie_diad2': x_nearest_LHS, 'RHS_tie_diad2': x_nearest_RHS}, index=[0]) return df_out
[docs] def loop_diad_skewness(*, Diad_files, path=None, filetype=None, skewness='abs', sort=False, int_cut_off=0.15, config_diad1: diad1_fit_config=diad1_fit_config(), config_diad2: diad2_fit_config=diad2_fit_config(), prominence=10, width=1, height=1): """ Loops over all supplied files to calculate skewness for multiple spectra using the functions assess_diad1_skewness() and assess_diad2_skewness() Parameters ----------- Diad_Files: list List of filenames to loop through path: str Folder user wishes to read data from filetype: str Identifies type of file choose from 'Witec_ASCII', 'headless_txt', 'headless_csv', 'head_csv', 'Witec_ASCII', 'HORIBA_txt', 'Renishaw_txt' skewness: 'abs' or 'dir' If 'abs', gives absolute skewness (e.g., largest possible number regardless of direction) if 'dir' does skewness as one side over the other sort: bool If true, sorts files alphabetically file_ext: str File format. Default txt, could also enter csv etc. exclude_str: str Excludes files with this string in their name. E.g. if exclude_str='Ne' it will exclude Ne lines exclude_type: str Excludes files of this type, e.g. exclude_type='png' gets rid of image files. lower_baseline_diad1, lower_baseline_diad2: list Region of spectra to use a baseline (LHS) for diad 1 and diad2 upper_baseline_diad1, upper_baseline_diad2: list Region of spectra to use a baseline (RHS) for diad 1 and diad 2 N_poly_bck_diad1, N_poly_bck_diad2: int Degree of polynomial for background fitting skewness: 'abs' or 'dir' If 'abs', gives absolute skewness (e.g., largest possible number regardless of direction) if 'dir' does skewness as one side over the other int_cut_off: float Value of intensity at which to calculate the skewness (e.g. 0.15X the peak height). Returns ----------- pd.DataFrame wtih filename, skewness of Diad 1, the x position of the LH and RH tie point at intensity=int_int_cut_off """ df_diad1 = pd.DataFrame([]) df_diad2 = pd.DataFrame([]) df_merged=pd.DataFrame([]) for i in range(0, len(Diad_files)): filename=Diad_files[i] print('working on file #'+str(i)) data_diad1=assess_diad1_skewness(config1=config_diad1, int_cut_off=int_cut_off, prominence=prominence, height=height, width=width, skewness=skewness, path=path, filename=filename, filetype=filetype) data_diad2=assess_diad2_skewness(config1=config_diad2, int_cut_off=int_cut_off, prominence=prominence, height=height, width=width, skewness=skewness, path=path, filename=filename, filetype=filetype) df_diad1 = pd.concat([df_diad1, data_diad1], axis=0) df_diad2 = pd.concat([df_diad2, data_diad2], axis=0) df_combo=pd.concat([df_diad1, df_diad2], axis=1) cols_to_move=['filename', 'Skewness_diad1', 'Skewness_diad2'] df_combo = df_combo[cols_to_move + [ col for col in df_combo.columns if col not in cols_to_move]] return df_combo
## Give nice column names