Source code for DiadFit.diads

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import patches
import lmfit
from lmfit.models import GaussianModel, VoigtModel, LinearModel, ConstantModel, PseudoVoigtModel, Pearson4Model
from scipy.signal import find_peaks
from scipy.signal.windows import gaussian


import os
import re
from os import listdir
from os.path import isfile, join
from DiadFit.importing_data_files import *
from typing import Tuple, Optional
from dataclasses import dataclass
import matplotlib.patches as patches
import warnings as w
from tqdm import tqdm
from scipy.integrate import trapezoid
from scipy.integrate import simpson
from scipy.interpolate import interp1d


# Allowed models

allowed_models = ["VoigtModel", "PseudoVoigtModel", "Pearson4Model", "SkewedVoigtModel"]

# For debuggin

#import warnings
#warnings.simplefilter('error')

encode="ISO-8859-1"


[docs]
def plot_diad(*,path=None, filename=None, filetype='Witec_ASCII', Spectra_x=None, Spectra_y=None):
    """This function makes a plot of the spectra for a specific file to allow visual inspectoin

    Parameters
    ----------------
    Either

    path: str
        Folder where spectra is stored
    filename: str
        Filename of specific spectra file of interest
    filetype: str
        choose from 'Witec_ASCII', 'headless_txt', 'headless_csv', 'head_csv', 'Witec_ASCII',
        'HORIBA_txt', 'Renishaw_txt'
    Or
        Spectra_x: np.array of x coordinates
        Spectra_y: np.array of y coordinates

    Returns
        plots figure with common peaks overlain


    """
    if 'CRR' in filename:
        filetype='headless_txt'

    if Spectra_x is None:
        Spectra_df=get_data(path=path, filename=filename, filetype=filetype)

        Spectra=np.array(Spectra_df)

        Spectra_x=Spectra[:, 0]
        Spectra_y=Spectra[:, 1]




    fig, (ax1) = plt.subplots(1, 1, figsize=(4,3))

    miny=np.min(Spectra_y)
    maxy=np.max(Spectra_y)
    ax1.plot([1090, 1090], [miny, maxy], ':k', label='Magnesite')
    ax1.plot([1131, 1131], [miny, maxy], '-',  alpha=0.5,color='grey', label='Anhydrite/Mg-Sulfate')
    #ax1.plot([1136, 1136], [miny, maxy], '-', color='grey', label='Mg-Sulfate')
    ax1.plot([1151, 1151], [miny, maxy], ':c', label='SO2')
    ax1.plot([1286, 1286], [miny, maxy], '-g',  alpha=0.5,label='Diad1')
    ax1.plot([1389, 1389], [miny, maxy], '-m', alpha=0.5, label='Diad2')
    ax1.legend()
    ax1.plot(Spectra_x, Spectra_y, '-r')
    ax1.set_xlabel('Wavenumber (cm-1)')
    ax1.set_ylabel('Intensity')







[docs]
@dataclass
class diad_id_config:

    # Thresholds for Scipy find peaks
    height: float = 1
    width: float=0.5
    prominence: float=50

    # to plot or not to plot
    plot_figure: bool = True


    # Exclude a range, e.g. cosmic rays
    exclude_range1: Optional [Tuple[float, float]] = None
    exclude_range2: Optional [Tuple[float, float]] = None
    # Approximate diad position
    approx_diad2_pos: Tuple[float, float]=(1379, 1395)
    approx_diad1_pos: Tuple[float, float]=(1275, 1290)


    # Background positions for prominence
    LH_bck_diad1=[1180, 1220]
    LH_bck_diad2=[1330, 1350]
    RH_bck_diad1=[1330, 1350]
    RH_bck_diad2=[1450, 1470]

    Diad_window_width=30

    Diad2_window: Tuple[float, float]=(approx_diad2_pos[0]-Diad_window_width, approx_diad2_pos[1]+Diad_window_width)
    Diad1_window: Tuple[float, float]=(approx_diad1_pos[0]-Diad_window_width, approx_diad1_pos[1])

    approx_diad2_pos_3peaks: Tuple[float, float]=(1379, 1395, 1379-17)








[docs]
def identify_diad_peaks(*, config: diad_id_config=diad_id_config(), path=None, filename=None, diad_array=None,
 filetype='Witec_ASCII',  plot_figure=True):

    """ This function fits a spline to the spectral data. It then uses Scipy find peaks to identify the diad,
    HB and C13 peaks. It outputs parameters such as peak positions, peak prominences,
    and peak height ratios that can be very useful when splitting data into groups.
    It uses information stored in diad_id_config for most parameters

    Parameters
    -------------
    config: dataclass
        This is diad_id_config, which you should edit before you feed into the function to adjust peak finding
        parameters. E.g. to change prominence of peaks found by scipy, do config=pf.diad_id_config(prominence=12),
        then input into this function

        Things that can be adjusted in this config file
            height: 1 (threshold height for Scipy find peaks)
            width: 0.5 (threshold width for Scipy find peaks)
            prominence: 50 (threshold prominence for scipy find peaks)
            exclude_range1: None (default ) or list - excludes a certain region of the spectra (e.g, [1200, 1250])
            exclude_range2: None (default ) or list - excludes a certain region of the spectra (e.g, [1300, 1310])
            Diad_window_width: 30 - Window to the left of Diad1 and to the right of Diad2 to look fo rdiad + hotbands.
            approx_diad2_pos: (1379, 1395) - Region for code to look for diad2 (+-Diad_window_width).
            approx_diad1_pos: (1275, 1290) - Region for code to look for diad1 (+-Diad_window_width).
            LH_bck_diad1, LH_bck_diad2, RH_bck_diad1, RH_bck_diad2: Positions either side of peaks to calculate prominences (e.g. LH_bck_diad1=[1180, 1220])


    path: str
        Path where spectra is stored

    filename: str
        Specific name of file being plotted

    filetype: str
        choose from 'Witec_ASCII', 'headless_txt', 'headless_csv', 'head_csv', 'Witec_ASCII',
        'HORIBA_txt', 'Renishaw_txt'

    plot_figure: bool
        if True, plots a figure of peaks, and the identified peaks that Scipy found

    Returns
    -------------
    pd.DataFrame
        Dataframe of approximate peak positions, prominences, etc.

    np.array of x and y coordinates of spectra

    figure if plot_figure is True


    """
    if diad_array is None:
        Diad_df=get_data(path=path, filename=filename, filetype=filetype)
        Diad=np.array(Diad_df)
    else:
        Diad=diad_array



    # First lets use find peaks
    y_in=Diad[:, 1]
    x_in=Diad[:, 0]
    spec_res=np.abs(x_in[1]-x_in[0])

    # Now lets fit a cubic spline to smoooth it all out a bit
    # Get x and y ready to make a cubic spline through data
    # Fit a  cubic spline
    f2 = interp1d(x_in, y_in, kind='cubic')
    x_new=np.linspace(np.min(x_in), np.max(x_in), 100000)

    y_cub=f2(x_new)

    y=y_cub
    x=x_new



    # Spacing of hotband from main peak
    diad2_HB2_min_offset=19-spec_res
    diad2_HB2_max_offset=23+spec_res

    # Spacing of HB from main peak.
    diad1_HB1_min_offset=19-spec_res
    diad1_HB1_max_offset=23+spec_res

    # Spacing of c13 from main peak
    diad2_C13_min_offset=16.5-spec_res
    diad2_C13_max_offset=20+spec_res

    peaks = find_peaks(y, height = config.height, prominence=config.prominence, width=config.width)


    # This gets the list of peak positions.
    peak_pos = x[peaks[0]]
    height = peaks[1]['peak_heights']
    df=pd.DataFrame(data={'pos': peak_pos,
                        'height': height})

    # Here, we are looking for peaks in the diad window.

    df_pks_diad1=df[(df['pos']>config.Diad1_window[0]) & (df['pos']<config.Diad1_window[1]) ]

    # Find peaks within the 2nd diad window
    df_pks_diad2=df[(df['pos']>config.Diad2_window[0]) & (df['pos']<config.Diad2_window[1]) ]


    # Find N highest peaks within range you have selected.
    df_sort_diad1=df_pks_diad1.sort_values('height', axis=0, ascending=False)
    if len(df_sort_diad1)>2:
        df_sort_diad1_trim=df_sort_diad1 #[0:2]
    else:
        df_sort_diad1_trim=df_sort_diad1

    df_sort_diad2=df_pks_diad2.sort_values('height', axis=0, ascending=False)
    if len(df_sort_diad2)>3:
        df_sort_diad2_trim=df_sort_diad2  #[0:3]

    else:
        df_sort_diad2_trim=df_sort_diad2

    # Check if any peaks lie where we expect the second diad
    right_pos_diad2=df_sort_diad2_trim['pos'].between(config.approx_diad2_pos[0], config.approx_diad2_pos[1])
    if any(right_pos_diad2):

        manual_diad2=False
        df_sort_diad2_rightpos=df_sort_diad2_trim.loc[right_pos_diad2]
        diad_2_diad=df_sort_diad2_rightpos.loc[df_sort_diad2_rightpos['height']==np.max(df_sort_diad2_rightpos['height'])]
        df_out=pd.DataFrame(data={'filename': filename,
                                'Diad2_pos': diad_2_diad['pos'],
                                'Diad2_height': diad_2_diad['height']})




        # Allocate hot band 2 position

        if any(df_sort_diad2_trim['pos'].between(diad_2_diad['pos'].iloc[0]+diad2_HB2_min_offset, diad_2_diad['pos'].iloc[0]+diad2_HB2_max_offset)):

            diad_2_HB=df_sort_diad2_trim.loc[df_sort_diad2_trim['pos'].between(diad_2_diad['pos'].iloc[0]+diad2_HB2_min_offset, diad_2_diad['pos'].iloc[0]+diad2_HB2_max_offset)]

            df_out['HB2_pos']=diad_2_HB['pos'].iloc[0]
            df_out['HB2_height']=diad_2_HB['height'].iloc[0]


        else:

            df_out['HB2_pos']=np.nan
            df_out['HB2_height']=np.nan


        # Look for C13 position
        if any(df_sort_diad2_trim['pos'].between(diad_2_diad['pos'].iloc[0]-diad2_C13_max_offset, diad_2_diad['pos'].iloc[0]-diad2_C13_min_offset)):
            diad_2_C13=df_sort_diad2_trim.loc[df_sort_diad2_trim['pos'].between(diad_2_diad['pos'].iloc[0]-diad2_C13_max_offset, diad_2_diad['pos'].iloc[0]-diad2_C13_min_offset)]
            df_out['C13_pos']=diad_2_C13['pos'].iloc[0]
            df_out['C13_height']=diad_2_C13['height'].iloc[0]
        else:
            df_out['C13_pos']=np.nan
            df_out['C13_height']=np.nan
    # If it didnt find anything where we expect Diad2 to be, find max position
    else:
        manual_diad2=True
        # Lets find the highest bit within this range
        diad2_range=(Diad[:, 0]>config.approx_diad2_pos[0]) & (Diad[:, 0]<config.approx_diad2_pos[1])
        diad2_height=np.max(Diad[:, 1][diad2_range])
        diad2_pos=Diad[:, 0][(Diad[:, 1]==diad2_height)&diad2_range]

        df_out=pd.DataFrame(data={'filename': filename,
                                    'Diad2_pos': diad2_pos[0] ,
                                'Diad2_height':diad2_height}, index=[0])

        # Lets try to find the hotband and hope its here!
        if any(df_sort_diad2_trim['pos'].between(df_out['Diad2_pos'].iloc[0]+diad2_HB2_min_offset, df_out['Diad2_pos'].iloc[0]+diad2_HB2_max_offset)):

            diad_2_HB=df_sort_diad2_trim.loc[df_sort_diad2_trim['pos'].between(df_out['Diad2_pos'].iloc[0]+diad2_HB2_min_offset, df_out['Diad2_pos'].iloc[0]+diad2_HB2_max_offset)]

            df_out['HB2_pos']=diad_2_HB['pos'].iloc[0]
            df_out['HB2_height']=diad_2_HB['height'].iloc[0]

        else:
            df_out['HB2_pos']=np.nan
            df_out['HB2_height']=np.nan

        # Lets try and find the C13 peak

        if any(df_sort_diad2_trim['pos'].between(df_out['Diad2_pos'].iloc[0]-diad2_C13_max_offset, df_out['Diad2_pos'].iloc[0]-diad2_C13_min_offset)):
            diad_2_C13=df_sort_diad2_trim.loc[df_sort_diad2_trim['pos'].between(df_out['Diad2_pos'].iloc[0]-diad2_C13_max_offset, df_out['Diad2_pos'].iloc[0]-diad2_C13_min_offset)]

            df_out['C13_pos']=diad_2_C13['pos'].iloc[0]
            df_out['C13_height']=diad_2_C13['height'].iloc[0]
        else:

            df_out['C13_pos']=np.nan
            df_out['C13_height']=np.nan



    # Do the same for diad 1

    right_pos_diad1=df_sort_diad1_trim['pos'].between(config.approx_diad1_pos[0], config.approx_diad1_pos[1])
    if any(right_pos_diad1):
        manual_diad1=False
        df_sort_diad1_rightpos=df_sort_diad1_trim.loc[right_pos_diad1]
        diad_1_diad=df_sort_diad1_rightpos.loc[df_sort_diad1_rightpos['height']==np.max(df_sort_diad1_rightpos['height'])]

        df_out['Diad1_pos']=diad_1_diad['pos'].iloc[0]
        df_out['Diad1_height']=diad_1_diad['height'].iloc[0]

        if any(df_sort_diad1_trim['pos'].between(diad_1_diad['pos'].iloc[0]-diad1_HB1_max_offset, diad_1_diad['pos'].iloc[0]-diad1_HB1_min_offset)):
            diad_1_HB=df_sort_diad1_trim.loc[df_sort_diad1_trim['pos'].between(diad_1_diad['pos'].iloc[0]-diad1_HB1_max_offset, diad_1_diad['pos'].iloc[0]-diad1_HB1_min_offset)]
            df_out['HB1_pos']=diad_1_HB['pos'].iloc[0]
            df_out['HB1_height']=diad_1_HB['height'].iloc[0]

        else:
            df_out['HB1_pos']=np.nan
            df_out['HB1_height']=np.nan


    else:
        manual_diad1=True
        diad1_range=(Diad[:, 0]>config.approx_diad1_pos[0]) & (Diad[:, 0]<config.approx_diad1_pos[1])
        diad1_height=np.max(Diad[:, 1][diad1_range])
        diad1_pos=Diad[:, 0][(Diad[:, 1]==diad1_height)&diad1_range]


        df_out['Diad1_pos']= diad1_pos[0]
        df_out['Diad1_height']=diad1_height

        # Lets see if we can find a hotband in here now
        if any(df_sort_diad1_trim['pos'].between(df_out['Diad1_pos'].iloc[0]-diad1_HB1_max_offset, df_out['Diad1_pos'].iloc[0]-diad1_HB1_min_offset)):
            diad_1_HB=df_sort_diad1_trim.loc[df_sort_diad1_trim['pos'].between(df_out['Diad1_pos'].iloc[0]-diad1_HB1_max_offset, df_out['Diad1_pos'].iloc[0]-diad1_HB1_min_offset)]
            df_out['HB1_pos']=diad_1_HB['pos'].iloc[0]
            df_out['HB1_height']=diad_1_HB['height'].iloc[0]

        else:
            df_out['HB1_pos']=np.nan
            df_out['HB1_height']=np.nan

    # Lets get the prominence of the main peaks and hotbands now we have found them
    Diad_x=Diad[:, 0]
    Diad_y=Diad[:, 1]
    Med_LHS_diad1=np.nanmedian(Diad_y[(Diad[:, 0]>config.LH_bck_diad1[0])& (Diad[:, 0]<config.LH_bck_diad1[1])])
    Med_RHS_diad1=np.nanmedian(Diad_y[(Diad[:, 0]>config.RH_bck_diad1[0])& (Diad[:, 0]<config.RH_bck_diad1[1])])
    Med_LHS_diad2=np.nanmedian(Diad_y[(Diad[:, 0]>config.LH_bck_diad2[0])& (Diad[:, 0]<config.LH_bck_diad2[1])])
    Med_RHS_diad2=np.nanmedian(Diad_y[(Diad[:, 0]>config.RH_bck_diad2[0])& (Diad[:, 0]<config.RH_bck_diad2[1])])
    #Med_central_back_diad2=np.nanmedian(Diad[(Diad[:, 0]>1300)& (Diad[:, 0]<1350)]

    Diad_diad1=Diad_y[(Diad[:, 0]>1260)& (Diad[:, 0]<1300)]
    Diad_diad2=Diad_y[(Diad[:, 0]>1380)& (Diad[:, 0]<1400)]
    Med_bck_diad1=(Med_LHS_diad1+Med_RHS_diad1)/2
    Med_bck_diad2=(Med_RHS_diad2+Med_LHS_diad2)/2

    df_out['Diad1_Median_Bck']=Med_bck_diad1
    df_out['Diad2_Median_Bck']=Med_bck_diad2

    df_out['Diad1_abs_prom']=df_out['Diad1_height']-Med_bck_diad1
    df_out['Diad2_abs_prom']=df_out['Diad2_height']-Med_bck_diad2
    df_out['Diad1_rel_prom']=df_out['Diad1_height']/Med_bck_diad1
    df_out['Diad2_rel_prom']=df_out['Diad2_height']/Med_bck_diad2
    df_out['HB1_abs_prom']=df_out['HB1_height']-Med_LHS_diad1
    df_out['HB2_abs_prom']=df_out['HB2_height']-Med_RHS_diad2
    df_out['HB1_rel_prom']=df_out['HB1_height']/Med_LHS_diad1
    df_out['HB2_rel_prom']=df_out['HB2_height']/Med_RHS_diad2

    df_out['approx_split']=df_out['Diad2_pos']-df_out['Diad1_pos']


    # Lets get the C13 prominence
    if any(df_out['C13_pos']>-500):
        C13_back=np.quantile(Diad_y[
        ((Diad_x>(df_out['C13_pos'].iloc[0]-3))&(Diad_x<(df_out['C13_pos'].iloc[0]+3)))
        ], 0.25)

        df_out['C13_abs_prom']=df_out['C13_height']-C13_back
        df_out['C13_rel_prom']=df_out['C13_height']/C13_back
        df_out['C13_HB2_abs_prom_ratio']=df_out['HB2_abs_prom']/df_out['C13_abs_prom']
    else:
        df_out['C13_abs_prom']=np.nan
        df_out['C13_rel_prom']=np.nan
        df_out['C13_HB2_abs_prom_ratio']=np.nan

    # Lets get the prominence of the valley between hotbands and the diad

    # Find the minimum intensity between the diad and HB

    if not np.isnan(df_out['HB2_pos'].iloc[0]):


        Diad2_between=Diad[(Diad[:, 0]<df_out['HB2_pos'].iloc[0])& (Diad[:, 0]>df_out['Diad2_pos'].iloc[0])]
        Min_y_midpoint2=np.min(Diad2_between[:, 1])

        # Option here
        RHS_x_Diad2=(df_out['HB2_pos'].iloc[0])+20
        RHS_y_Diad2=np.median(Diad[:, 1][
            (Diad[:, 0]<RHS_x_Diad2+2*spec_res)
            &(Diad[:, 0]>RHS_x_Diad2-2*spec_res)])

        df_out['Diad2_HB2_Valley_prom']=Min_y_midpoint2/RHS_y_Diad2
    else:

        df_out['Diad2_HB2_Valley_prom']=np.nan

    if not np.isnan(df_out['HB1_pos'].iloc[0]):
        Diad1_between=Diad[(Diad[:, 0]>df_out['HB1_pos'].iloc[0])& (Diad[:, 0]<df_out['Diad1_pos'].iloc[0])]
        Min_y_midpoint1=np.min(Diad1_between[:, 1])

        # option here seems to work better
        LHS_x_Diad1=(df_out['HB1_pos'].iloc[0])-20
        LHS_y_Diad1=np.median(Diad[:, 1][
            (Diad[:, 0]<LHS_x_Diad1+2.1*spec_res)
            &(Diad[:, 0]>LHS_x_Diad1-2.1*spec_res)])


        df_out['Diad1_HB1_Valley_prom']=Min_y_midpoint1/LHS_y_Diad1
    else:
        df_out['Diad1_HB1_Valley_prom']=np.nan
    # Divide by the median background position found above






    # Other useful params

    # Mean of HB elevation.
    df_out['Mean_Diad_HB_Valley_prom']=(df_out['Diad2_HB2_Valley_prom']+df_out['Diad1_HB1_Valley_prom'])/2

    # Mean HB prominence
    df_out['Mean_abs_HB_prom']=(df_out['HB1_abs_prom']+df_out['HB2_abs_prom'])

    df_out['Diad2_HB2_abs_prom_ratio']=df_out['Diad2_abs_prom']/df_out['HB2_abs_prom']
    df_out['Diad1_HB1_abs_prom_ratio']=df_out['Diad1_abs_prom']/df_out['HB1_abs_prom']
    # Average these
    df_out['Av_Diad_HB_prom_ratio']=(df_out['Diad2_HB2_abs_prom_ratio']+df_out['Diad1_HB1_abs_prom_ratio'])/2
    # Parameter for amount of noise between diads vs. height of peaks

    between_diads_x=(x>diad_id_config.RH_bck_diad1[0])&(x<diad_id_config.LH_bck_diad2[1])
    std_bet_diad=np.std(y[between_diads_x])

    df_out['Diad1_prom/std_betweendiads']=df_out['Diad1_abs_prom']/std_bet_diad
    df_out['Diad2_prom/std_betweendiads']=df_out['Diad2_abs_prom']/std_bet_diad
    df_out['HB1_prom/std_betweendiads']=df_out['HB1_abs_prom']/std_bet_diad
    df_out['HB2_prom/std_betweendiads']=df_out['HB2_abs_prom']/std_bet_diad

    # New ones
    df_out['Av_Diad_prom/std_betweendiads']=(df_out['Diad1_prom/std_betweendiads']+df_out['Diad2_prom/std_betweendiads'])/2
    df_out['C13_prom/HB2_prom']=df_out['C13_abs_prom']/df_out['HB2_abs_prom']

    df_out['Left_vs_Right']=Med_LHS_diad1/Med_RHS_diad2



    # Lets sort based on columns we want near each other
    cols_to_move = ['filename', 'approx_split',
    'Diad1_pos', 'Diad2_pos','HB1_pos', 'HB2_pos', 'C13_pos',
    'Diad1_abs_prom', 'Diad2_abs_prom', 'HB1_abs_prom', 'HB2_abs_prom', 'C13_abs_prom',
    'Mean_abs_HB_prom', 'Diad2_HB2_abs_prom_ratio', 'Diad1_HB1_abs_prom_ratio',
    'Diad1_rel_prom', 'Diad2_rel_prom', 'HB1_rel_prom', 'HB2_rel_prom', 'C13_rel_prom',
    'Diad1_HB1_Valley_prom',
    'Mean_Diad_HB_Valley_prom',
    'Diad1_prom/std_betweendiads', 'Diad2_prom/std_betweendiads',
    'Av_Diad_prom/std_betweendiads', 'C13_prom/HB2_prom', 'Av_Diad_HB_prom_ratio', 'Left_vs_Right']

    df_out = df_out[cols_to_move + [
        col for col in df_out.columns if col not in cols_to_move]]
    # Now lets get approximate strength subtracting the backgrround

    # Now lets make a plot
    if plot_figure is True:
        fig, (ax0, ax1, ax2) = plt.subplots(1, 3, figsize=(12,4))

        ax0.plot(Diad[:, 0], Diad[:, 1], '-r')
        ax0.plot(df['pos'], df['height'], '*k')
        ax1.plot(df['pos'], df['height'], '*k', label='All Scipy Peaks')
        ax2.plot(df['pos'], df['height'], '*k')
        if manual_diad1 is True:
            ax1.plot(diad1_pos[0], diad1_height, 'dk', mfc='yellow', ms=7, label='SciPyMissed')

        if manual_diad2 is True:
            ax1.plot(diad2_pos[0], diad2_height, 'dk', mfc='yellow', ms=7)
        #ax0.legend()
        ax1.set_title('Diad1')
        ax1.plot(Diad[:, 0],Diad[:, 1], '-r')
        ax1.set_xlim([config.Diad1_window[0], config.Diad1_window[1]])
        ax2.set_title('Diad2')
        ax2.plot(Diad[:, 0],Diad[:, 1], '-r')
        ax2.set_xlim([config.Diad2_window[0], config.Diad2_window[1]])
        #ax0.set_ylim[np.nanmin(Diad[:, 1]), np.nanmax(Diad[:, 1]) ])
        fig.tight_layout()
        ax2.plot(df_sort_diad2_trim['pos'], df_sort_diad2_trim['height'], '*k', mfc='yellow', ms=10)
        ax1.plot(df_sort_diad1_trim['pos'], df_sort_diad1_trim['height'], '*k',  mfc='yellow', ms=10, label='Selected Pks')
        ax1.legend()
        ax0.set_xlabel('Wavenumber')
        ax0.set_ylabel('Intensity')
        ax1.set_xlabel('Wavenumber')
        ax1.set_ylabel('Intensity')
        ax2.set_xlabel('Wavenumber')
        ax2.set_ylabel('Intensity')

        for i in range(0, len(df_sort_diad1_trim)):
            ax1.annotate(str(np.round(df_sort_diad1_trim['pos'].iloc[i], 1)), xy=(df_sort_diad1_trim['pos'].iloc[i]-10,
            df_sort_diad1_trim['height'].iloc[i]-1/7*(df_sort_diad1_trim['height'].iloc[i]-700)), xycoords="data", fontsize=10, rotation=90)

        for i in range(0, len(df_sort_diad2_trim)):
            ax2.annotate(str(np.round(df_sort_diad2_trim['pos'].iloc[i], 1)), xy=(df_sort_diad2_trim['pos'].iloc[i]-10,
            df_sort_diad2_trim['height'].iloc[i]-1/7*(df_sort_diad2_trim['height'].iloc[i]-700)), xycoords="data", fontsize=10, rotation=90)


    return df_out, Diad, fig if 'fig' in locals() else None











[docs]
def loop_approx_diad_fits(*, spectra_path, config, Diad_Files, filetype, plot_figure):
    """ Uses the identify_diad_peaks function to loop over all files.
    Parameters
    ----------------
    config: dataclass
        See documentation for identify_diad_peaks

    spectra_path: str
        Path where spectra is stored

    Diad_Files: list
        List of file names to loop over

    filetype: str
        choose from 'Witec_ASCII', 'headless_txt', 'headless_csv', 'head_csv', 'Witec_ASCII',
        'HORIBA_txt', 'Renishaw_txt'

    plot_figure: bool
        if True, plots a figure of peaks, and the identified peaks that Scipy found

    Returns
    -----------
    pd.DataFrame of the approximate peak parameters for each file.

    np.array of y coordinates of every spectra.


    """

    # Do fit for first file to get length
    df_peaks, Diad, fig=identify_diad_peaks(
    config=config, path=spectra_path, filename=Diad_Files[0],
    filetype=filetype, plot_figure=plot_figure)

    # Now do for all files
    fit_params = pd.DataFrame([])
    x_cord=Diad[:, 0]
    data_y_all=np.zeros([  len(x_cord), len(Diad_Files)], float)
    data_x_all=np.zeros([  len(x_cord), len(Diad_Files)], float)
    i=0
    for file in tqdm(Diad_Files):


        df_peaks, Diad, fig=identify_diad_peaks(
        config=config, path=spectra_path, filename=file,
    filetype=filetype, plot_figure=plot_figure)

        data_y_all[:, i]=Diad[:, 1]
        data_x_all[:, i]=Diad[:, 0]
        #data = pd.concat([Diad, data], axis=0)
        fit_params = pd.concat([fit_params, df_peaks], axis=0)
        i=i+1

    fit_params=fit_params.reset_index(drop=True)
    # Lets check all data x are the same.
    a=data_x_all
    same = np.all(a[0, :] == a[0, 0])

    if same==True:

        return fit_params, data_y_all
    if same==False:
        print('You do not have the same x axis for all spectra. ')
        diff_val_row = np.where(np.any(a != a[0], axis=1))[0]
        print(diff_val_row)


        return fit_params, data_y_all





[docs]
def plot_peak_params(fit_params,
                     x_param='Diad1_pos',  y1_param='approx_split',
                    y2_param='Mean_Valley_prom', y3_param='C13_abs_prom',
                    y4_param='HB2_abs_prom', fill_na=0):

    """ Makes a 2X2 figure of fit parameters, with the same x axis, and 4 different y axis

    Parameters
    -----------

    fit_params: Pandas DataFrame
        dataframe of approximate fit params from function loop_approx_diad_fits
    x_param: str
        parameter you want on the x axis of all plots
    y1_param: str
        parameter on y axis of 1st subplot (top left)
    y2_param: str
        parameter on y axis of 2nd subplot (top right)
    y3_param: str
        parameter on y axis of 3rd subplot (bottom left)
    y4_param: str
        parameter on y axis of 4th subplot (bottom right)
    fill_na: int
        The integer value to fill Nans with to show up on plots.

    Returns
    ----------
    4 part figure

    """
    fit_params_nona=fit_params.fillna(fill_na)

    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(10*0.8,8*0.8))


    ax1.plot(fit_params_nona[x_param], fit_params_nona[y1_param],
        'xr')
    ax1.set_xlabel(x_param)
    ax1.set_ylabel(y1_param)

    ax2.plot(fit_params_nona[x_param], fit_params_nona[y2_param],
        'xr')
    ax2.set_xlabel(x_param)
    ax2.set_ylabel(y2_param)
    fig.tight_layout()

    ax3.plot(fit_params_nona[x_param], fit_params_nona[y3_param],
        'xr')
    ax3.set_xlabel(x_param)
    ax3.set_ylabel(y3_param)

    ax4.plot(fit_params_nona[x_param], fit_params_nona[y4_param],
        'xr')
    ax4.set_xlabel(x_param)
    ax4.set_ylabel(y4_param)
    fig.tight_layout()

    return fig




[docs]
def filter_splitting_prominence(*, fit_params, data_y_all,
                                x_cord,
                                splitting_limits=[100, 107],
                                lower_diad1_prom=10, exclude_str=None, str_filt=None):
    """ Filters Spectra based on approximate splitting, draws a plot showing spectra to discard and those to keep

    Parameters
    --------------
    fit_params: pd.dataframe
        dataframe of fit parameters from loop_approx_diad_fits

    data_y_all: np.array
        y coordinates of each spectra from loop_approx_diad_fits, used for plotting visualizatoins

    x_cord: np.array
        x coordinates of 1 spectra. Assumes all x coordinates the same length

    splitting_limits: list (e.g. [100, 107])
        only keeps spectra with splitting in this range

    lower_diad1_prom: int, float
        Only keeps spectra that meet the splitting parameter, and have an absolute
        diad1 prominence greater than this value (helps filter out other weird spectra)

    exclude_str: str
        Excludes files with this string.

    str_filt: str
        Filters just based on string in filename

    Returns
    --------------
    fit_params_filt: pd.DataFrame
        dataframe of fit parameters for spectra to keep
    data_y_filt: np.array
        y coordinates of spectra to keep
    fit_params_disc: pd.DataFrame
        dataframe of fit parameters for spectra to discard
    data_y_disc: np.array
        y coordinates of spectra to discard


    """
    if str_filt is not None:
        filt=fit_params['filename'].str.contains(str_filt)

    else:

        reas_split=(fit_params['approx_split'].between(splitting_limits[0], splitting_limits[1]))
        reas_heigh=fit_params['Diad1_abs_prom']>lower_diad1_prom

        if exclude_str is not None:
            name_in_file=~fit_params['filename'].str.contains(exclude_str)
        else:
            name_in_file=reas_heigh

        filt=reas_split&reas_heigh&name_in_file

    fit_params_filt=fit_params.loc[filt].reset_index(drop=True)
    fit_params_disc=fit_params.loc[~(filt)].reset_index(drop=True)

    print('Keeping N='+str(len(fit_params_filt)))
    print('Discarding N='+str(len(fit_params_disc)))



    # Then apply to get data
    data_y_filt=data_y_all[:, (filt)]
    data_y_disc=data_y_all[:, ~(filt)]

    intc=800
    prom_filt=0
    prom_disc=0
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12,5))
    ax1.set_title('Spectra to Discard')
    ax2.set_title('Spectra to Keep')
    if sum(~filt)>0:
        for i in range(0, np.shape(data_y_disc)[1]):
            av_prom_disc=np.abs(np.nanmedian(fit_params_disc['Diad1_abs_prom'])/intc)
            Diff=np.nanmax(data_y_disc[:, i])-np.nanmin(data_y_disc[:, i])
            av_prom_Keep=fit_params_disc['Diad1_abs_prom'].iloc[i]
            prom_disc=prom_disc+av_prom_disc
            ax1.plot(x_cord+i*5, (data_y_disc[:, i]-np.nanmin(data_y_disc[:, i]))/Diff+i/3, '-r', lw=0.5)
            yplot=np.quantile((data_y_disc[:, i]-np.nanmin(data_y_disc[:, i]))/Diff+i/3, 0.65)
            file=fit_params_disc['filename'].iloc[i]
            file = file.replace("_CRR_DiadFit", "")
            ax1.annotate(str(file), xy=(1450+i*5, yplot),
                    xycoords="data", fontsize=8, bbox=dict(facecolor='white', edgecolor='none', pad=2))

        ax1.set_xlim([1250, 1500+i*5])
        ax1.set_xticks([])
        ax1.set_yticks([])
    if sum(filt)>0:
        for i in range(0, np.shape(data_y_filt)[1]):
            Diff=np.nanmax(data_y_filt[:, i])-np.nanmin(data_y_filt[:, i])
            av_prom_Keep=fit_params_filt['Diad1_abs_prom'].iloc[i]
            prom_filt=prom_filt+av_prom_Keep
            file=fit_params_filt['filename'].iloc[i]
            ax2.plot(x_cord+i*5, (data_y_filt[:, i]-np.nanmin(data_y_filt[:, i]))/Diff+i/3, '-b', lw=0.5)
            yplot=np.quantile((data_y_filt[:, i]-np.nanmin(data_y_filt[:, i]))/Diff+i/3, 0.65)
            ax2.annotate(str(file), xy=(1450+i*5, yplot),
                    xycoords="data", fontsize=8, bbox=dict(facecolor='white', edgecolor='none', pad=2))


        ax2.set_xlim([1250, 1450+i*5])
        ax2.set_xticks([])
        ax2.set_yticks([])

    return fit_params_filt.reset_index(drop=True), data_y_filt, fit_params_disc.reset_index(drop=True), data_y_disc



[docs]
def filter_by_string(*, fit_params, data_y_all,
                                x_cord,
                                 str_filt=None):
    """ Filters Spectra based on approximate splitting, draws a plot showing spectra to discard and those to keep

    Parameters
    --------------
    fit_params: pd.dataframe
        dataframe of fit parameters from loop_approx_diad_fits

    data_y_all: np.array
        y coordinates of each spectra from loop_approx_diad_fits, used for plotting visualizatoins

    x_cord: np.array
        x coordinates of 1 spectra. Assumes all x coordinates the same length



    str_filt: str
        Filters just based on string in filename. Keeps files with string in, discards those without.

    Returns
    --------------
    fit_params_filt: pd.DataFrame
        dataframe of fit parameters for spectra to keep
    data_y_filt: np.array
        y coordinates of spectra to keep
    fit_params_disc: pd.DataFrame
        dataframe of fit parameters for spectra to discard
    data_y_disc: np.array
        y coordinates of spectra to discard


    """

    filt=fit_params['filename'].str.contains(str_filt)


    fit_params_filt=fit_params.loc[filt].reset_index(drop=True)
    fit_params_disc=fit_params.loc[~(filt)].reset_index(drop=True)

    print('Keeping N='+str(len(fit_params_filt)))
    print('Discarding N='+str(len(fit_params_disc)))



    # Then apply to get data
    data_y_filt=data_y_all[:, (filt)]
    data_y_disc=data_y_all[:, ~(filt)]

    intc=800
    prom_filt=0
    prom_disc=0
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12,5))
    ax1.set_title('Samples')
    ax2.set_title('Standards')
    if sum(~filt)>0:
        for i in range(0, np.shape(data_y_disc)[1]):
            av_prom_disc=np.abs(np.nanmedian(fit_params_disc['Diad1_abs_prom'])/intc)
            Diff=np.nanmax(data_y_disc[:, i])-np.nanmin(data_y_disc[:, i])
            av_prom_Keep=fit_params_disc['Diad1_abs_prom'].iloc[i]
            prom_disc=prom_disc+av_prom_disc
            ax1.plot(x_cord+i*5, (data_y_disc[:, i]-np.nanmin(data_y_disc[:, i]))/Diff+i/3, '-r', lw=0.5)
            yplot=np.quantile((data_y_disc[:, i]-np.nanmin(data_y_disc[:, i]))/Diff+i/3, 0.65)
            file=fit_params_disc['filename'].iloc[i]
            file = file.replace("_CRR_DiadFit", "")
            ax1.annotate(str(file), xy=(1450+i*5, yplot),
                    xycoords="data", fontsize=8, bbox=dict(facecolor='white', edgecolor='none', pad=2))

        ax1.set_xlim([1250, 1500+i*5])
        ax1.set_xticks([])
        ax1.set_yticks([])
    if sum(filt)>0:
        for i in range(0, np.shape(data_y_filt)[1]):
            Diff=np.nanmax(data_y_filt[:, i])-np.nanmin(data_y_filt[:, i])
            av_prom_Keep=fit_params_filt['Diad1_abs_prom'].iloc[i]
            prom_filt=prom_filt+av_prom_Keep
            file=fit_params_filt['filename'].iloc[i]
            ax2.plot(x_cord+i*5, (data_y_filt[:, i]-np.nanmin(data_y_filt[:, i]))/Diff+i/3, '-b', lw=0.5)
            yplot=np.quantile((data_y_filt[:, i]-np.nanmin(data_y_filt[:, i]))/Diff+i/3, 0.65)
            ax2.annotate(str(file), xy=(1450+i*5, yplot),
                    xycoords="data", fontsize=8, bbox=dict(facecolor='white', edgecolor='none', pad=2))


        ax2.set_xlim([1250, 1450+i*5])
        ax2.set_xticks([])
        ax2.set_yticks([])

    return fit_params_filt.reset_index(drop=True), data_y_filt, fit_params_disc.reset_index(drop=True), data_y_disc




[docs]
def identify_diad_group(*, fit_params, data_y,  x_cord, filter_bool,y_fig_scale=0.1, grp_filter='Weak', str_filt=None):

    """Sorts diads into two groups. Those meeting the 'filter_bool' criteria, and those not
     meeting this criteria. Ones meeting the criteria are shown on the left hand plot,
    ones not meeting the criteria are shown in the right hand plot

    Parameters
    -----------------
    fit_params: pd.DataFrame
        dataframe of approximate peak parameters from loop_approx_diad_fits

    data_y: np.array
        y coordinates of each file in fit_params

    x_cord: np.array
        x coordinate of 1 spectra. Assumes all spectra have the same xcoordinate

    filter_bool: Pandas.series of bool (e.g. True, False, True, False)
        This is the filter to use to subdivide spectra into two groups.

    y_fig_scale: float, int (0.1)
        Determins how far apart the spectra are displayed on the plot. Larger number, the longer the plot is
        (and easier spectra are to see). The total height of the figure is 2+y_fig_scale*number of spectra

    grp_filter: str
        The name of the group where the filter_bool is true. E.g. if you apply a filter to get the weak diads,
        enter 'Weak' here. If filter_bool is to identify Strong spectra, enter 'Medium-Strong' here.

    Returns
    -----------------
    pd.DataFrame of fit parameters where filter_bool is True
    pd.DataFrame of fit parameters where filter_bool is False
    np.array of y coordinates where filter_bool is True
    np.array of y coordinates where filter_bool is False




    """
    if str_filt is not None:
        filt_name=~fit_params['filename'].str.contains(str_filt)
        filt_bool=filt_name&filt_bool

    if np.shape(data_y)[1]==0:
        Group1_df=pd.DataFrame().reindex_like(fit_params)
        Groupnot1_df=pd.DataFrame().reindex_like(fit_params)
        Group1_np_y=np.zeros(0, dtype='float')
        Groupnot1_np_y=np.zeros(0, dtype='float')
        return Group1_df, Groupnot1_df, Group1_np_y, Groupnot1_np_y

    else:
        # Grp1 is where the bool is true.

        grp1=filter_bool

        fit_params_notgrp1=fit_params.loc[~grp1]


        # Find ones in group1, in dataframe and numpy form
        Group1_df=fit_params.loc[grp1]#.reset_index(drop=True)
        Group1_df_reset=fit_params.loc[grp1].reset_index(drop=True)
        index_Grp1=Group1_df.index
        Group1_np_y=data_y[:, index_Grp1]

        # Ones not in group1
        Groupnot1_df=fit_params.loc[~grp1]#.reset_index(drop=True)
        Groupnot1_df_reset=fit_params.loc[~grp1].reset_index(drop=True)
        index_Grpnot1=Groupnot1_df.index
        Groupnot1_np_y=data_y[:, index_Grpnot1]


        fig, (ax0, ax1) = plt.subplots(1, 2, figsize=(12, 2+y_fig_scale*len(fit_params)))
        intc=8


        #
        if sum(grp1)>0:



            if sum(grp1)==1:
                i=0
                av_prom_disc=np.abs(np.nanmedian(Group1_df['Diad1_abs_prom'])/intc)
                Diff=np.nanmax(Group1_np_y[:, i])-np.nanmin(Group1_np_y[:, i])
                ax0.plot(x_cord-i*5, (Group1_np_y[:, i]-np.nanmin(Group1_np_y[:, i]))/Diff+i/3, '-r', lw=0.5)
                yplot=np.quantile((Group1_np_y[:, i]-np.nanmin(Group1_np_y[:, i]))/Diff+i/3, 0.65)
                file=Group1_df_reset['filename'].iloc[i]
                file = file.replace("_CRR_DiadFit", "")
                ax0.annotate(str(file), xy=(1450-i*5, yplot),
                    xycoords="data", fontsize=8, bbox=dict(facecolor='white', edgecolor='none', pad=2))


            else:
                for i in range(0, np.shape(Group1_np_y)[1]):



                    av_prom_disc=np.abs(np.nanmedian(Group1_df['Diad1_abs_prom'])/intc)
                    Diff=np.nanmax(Group1_np_y[:, i])-np.nanmin(Group1_np_y[:, i])
                    ax0.plot(x_cord-i*5, (Group1_np_y[:, i]-np.nanmin(Group1_np_y[:, i]))/Diff+i/3, '-r', lw=0.5)

                    yplot=np.quantile((Group1_np_y[:, i]-np.nanmin(Group1_np_y[:, i]))/Diff+i/3, 0.5)
                    file=Group1_df_reset['filename'].iloc[i]
                    file = file.replace("_CRR_DiadFit", "")
                    ax0.annotate(str(file), xy=(1450-i*5, yplot),
                        xycoords="data", fontsize=8, bbox=dict(facecolor='white', edgecolor='none', pad=2))
            ax0.set_xlim([1250-i*5, 1550])
            ax0.set_xticks([])
            ax0.set_yticks([])



            # av_prom_Group1=np.abs(np.nanmedian(Group1_df[x_param])/intc)
            # ax0.plot(x_cord, Group1_np_y[:, i]+av_prom_Group1*i, '-r')

        if sum(~grp1)>0:


            if sum(~grp1)==1:
                j=0
                av_prom_disc=np.abs(np.nanmedian(Groupnot1_df['Diad1_abs_prom'])/intc)
                Diff=np.nanmax(Groupnot1_np_y[:, j])-np.nanmin(Groupnot1_np_y[:, j])
                ax1.plot(x_cord-j*5,
                (Groupnot1_np_y[:, j]-np.nanmin(Groupnot1_np_y[:, j]))/Diff+j/3, '-k', lw=0.5)
                yplot=np.quantile((Groupnot1_np_y[:, j]-np.nanmin(Groupnot1_np_y[:, j]))/Diff+j/3, 0.65)
                file=Groupnot1_df_reset['filename'].iloc[j]
                file = file.replace("_CRR_DiadFit", "")
                ax1.annotate(str(file), xy=(1450-j*5, yplot),
                    xycoords="data", fontsize=8, bbox=dict(facecolor='white', edgecolor='none', pad=2))


            else:

                for j in range(0, np.shape(Groupnot1_np_y)[1]):

                    av_prom_disc=np.abs(np.nanmedian(Groupnot1_df['Diad1_abs_prom'])/intc)
                    Diff=np.nanmax(Groupnot1_np_y[:, j])-np.nanmin(Groupnot1_np_y[:, j])
                    ax1.plot(x_cord-j*5,
                    (Groupnot1_np_y[:, j]-np.nanmin(Groupnot1_np_y[:, j]))/Diff+j/3, '-k', lw=0.5)
                    yplot=np.quantile((Groupnot1_np_y[:, j]-np.nanmin(Groupnot1_np_y[:, j]))/Diff+j/3, 0.5)
                    file=Groupnot1_df_reset['filename'].iloc[j]
                    file = file.replace("_CRR_DiadFit", "")
                    ax1.annotate(str(file), xy=(1450-j*5, yplot),
                        xycoords="data", fontsize=8, bbox=dict(facecolor='white', edgecolor='none', pad=2))

            ax1.set_xlim([1250-j*5, 1550])
            ax1.set_xticks([])
            ax1.set_yticks([])



        if grp_filter=='Medium-Strong':
            ax0.set_title('Classified as Strong')
            ax1.set_title('Classified as Medium')
        if grp_filter=='Weak':
            ax0.set_title('Classified as Weak')
            ax1.set_title('Ones left (not classified yet)')


        plt.subplots_adjust(wspace=0)

        return Group1_df.reset_index(drop=True), Groupnot1_df.reset_index(drop=True),Group1_np_y, Groupnot1_np_y





[docs]
def plot_diad_groups(*, x_cord,  Weak_np=None, Medium_np=None, Strong_np=None, y_fig_scale=0.5):
    """
    This makes a figure showing spectra classified into Weak, Medium and Strong groups on a 3 panel plot

    Parameters
    --------------
    x_cord: np.array
        x_coordinate for all spectra (must be the same)

    Weak_np: np.array
        y coordinates of spectra classified as weak

    Medium_np: np.array
        y coordinates of spectra classified as medium

    Strong_np: np.array
        y coordinates of spectra classified as strong

    y_fig_scale: float
        The total height of the figure is 2+y_fig_scale*total number of spectra entered.


    Returns
    -------------
    figure showing 3 diad groups.




    """


    #
    if len(Weak_np)>0:
        Num_Weak=np.shape(Weak_np)[1]
    else:
        Num_Weak=0
    if len(Medium_np)>0:
        Num_Medium=np.shape(Medium_np)[1]
    else:
        Num_Medium=0
    if len(Strong_np)>0:
        Num_Strong=np.shape(Strong_np)[1]
    else:
        Num_Strong=0


    Total=Num_Strong+Num_Medium+Num_Weak
    fig, (ax0, ax1, ax2) = plt.subplots(1, 3, figsize=(15,2+y_fig_scale*Total))
    if Num_Weak>0:
        for i in range(0, np.shape(Weak_np)[1]):
            Diff=np.nanmax(Weak_np[:, i])-np.nanmin(Weak_np[:, i])
            ax0.plot(x_cord-i*5, (Weak_np[:, i]-np.nanmin(Weak_np[:, i]))/Diff+i/3, '-r', lw=0.5)
        ax0.set_xlim([1250-i*5, 1450])
        ax0.set_xticks([])
        ax0.set_yticks([])

    if Num_Medium>0:
        for i in range(0, np.shape(Medium_np)[1]):
            Diff=np.nanmax(Medium_np[:, i])-np.nanmin(Medium_np[:, i])
            ax1.plot(x_cord-i*5, (Medium_np[:, i]-np.nanmin(Medium_np[:, i]))/Diff+i/3, '-b', lw=0.5)
        ax1.set_xlim([1250-i*5, 1450])
        ax1.set_xticks([])
        ax1.set_yticks([])

    if Num_Strong>0:
        for i in range(0, np.shape(Strong_np)[1]):
            Diff=np.nanmax(Strong_np[:, i])-np.nanmin(Strong_np[:, i])
            ax2.plot(x_cord-i*5, (Strong_np[:, i]-np.nanmin(Strong_np[:, i]))/Diff+i/3, '-g', lw=0.5)
        ax2.set_xlim([1250-i*5, 1450])
        ax2.set_xticks([])
        ax2.set_yticks([])

    ax0.set_title('Weak, N='+str(Num_Weak))
    ax1.set_title('Medium, N='+str(Num_Medium))
    ax2.set_title('Strong, N='+str(Num_Strong))
    plt.subplots_adjust(wspace=0)

    return fig




[docs]
def remove_diad_baseline(*, path=None, filename=None, Diad_files=None, filetype='Witec_ASCII', diad_array=None,
            exclude_range1=None, exclude_range2=None,N_poly=1, x_range_baseline=10,
            lower_bck=[1200, 1250], upper_bck=[1320, 1330], sigma=4,
            plot_figure=True):
    """ This function fits an Nth degree polynomial fitted through spectra points between two background positions. It returns baseline-subtracted data

    Parameters
    -----------
    path: str
        Folder where spectra is stored

    filename: str
        Specific file being read

    OR

    Diad_files: str
        Filename if in same folder as script.

    filetype: str
        Identifies type of file. choose from 'Witec_ASCII', 'headless_txt', 'headless_csv', 'head_csv', 'Witec_ASCII',
        'HORIBA_txt', 'Renishaw_txt'

    exclude_range1: None or list length 2
        Excludes a region, e.g. a cosmic ray

    exclude_range2: None or list length 2
        Excludes a region, e.g. a cosmic ray

    sigma: int
        Filters out points that are more than this number of sigma from the median of the baseline.

    N_poly: int
        Degree of polynomial to fit to the backgroun

    lower_bck: list len 2
        wavenumbers of baseline region to the left of the peak of interest

    upper_bck: list len 2
        wavenumbers to the right of the peak of interest


    plot_figure: bool
        if True, plots figure, if False, doesn't.

    Returns
    -----------
    y_corr, Py_base, x,  Diad_short, Py_base, Pf_baseline, Baseline_ysub, Baseline_x, Baseline, span

        y_corr: Background subtracted y values trimmed in baseline range
        x: initial x values trimmed in baseline range
        Diad_short: Initial data (x and y) trimmed in baseline range
        Py_base: Fitted baseline for trimmed x coordinates
        Pf_baseline: polynomial model fitted to baseline
        Baseline_ysub: Baseline fitted for x coordinates in baseline
        Baseline_x: x co-ordinates in baseline.
        Baseline: Filtered to remove points outside sigma*std dev of baseline
        span: lower and upper x coordinate of region that isnt the baseline



    """

    if diad_array is None:
        Diad_df=get_data(path=path, filename=filename, filetype=filetype)
        Diad=np.array(Diad_df)
    else:
        Diad=diad_array



    if exclude_range1 is not None and exclude_range2 is None:
        Diad_old=Diad.copy()
        Diad=Diad[(Diad[:, 0]<exclude_range1[0])|(Diad[:, 0]>exclude_range1[1])]
        Discard=Diad_old[(Diad_old[:, 0]>=exclude_range1[0]) & (Diad_old[:, 0]<=exclude_range1[1])]


    if exclude_range2 is not None and exclude_range1 is None:
        Diad_old=Diad.copy()
        Diad=Diad[(Diad[:, 0]<exclude_range2[0])|(Diad[:, 0]>exclude_range2[1])]

        Discard=Diad_old[(Diad_old[:, 0]>=exclude_range2[0]) & (Diad_old[:, 0]<=exclude_range2[1])]

    if exclude_range1 is not None and exclude_range2 is not None:
        Diad_old=Diad.copy()
        Diad=Diad[
        ((Diad[:, 0]<exclude_range1[0])|(Diad[:, 0]>exclude_range1[1]))
        &
        ((Diad[:, 0]<exclude_range2[0])|(Diad[:, 0]>exclude_range2[1]))
        ]

        Discard=Diad_old[
        ((Diad_old[:, 0]>=exclude_range1[0]) & (Diad_old[:, 0]<=exclude_range1[1]))
        |
        ((Diad_old[:, 0]>=exclude_range2[0]) & (Diad_old[:, 0]<=exclude_range2[1]))
        ]


    lower_0baseline=lower_bck[0]
    upper_0baseline=lower_bck[1]
    lower_1baseline=upper_bck[0]
    upper_1baseline=upper_bck[1]
    # Bit that is actually peak, not baseline
    span=[upper_0baseline, lower_1baseline]

    # lower_2baseline=1320
    # upper_2baseline=1330

    # Trim for entire range
    Diad_short=Diad[ (Diad[:,0]>lower_0baseline) & (Diad[:,0]<upper_1baseline) ]

    # Get actual baseline
    Baseline_with_outl=Diad_short[
    ((Diad_short[:, 0]<upper_0baseline) &(Diad_short[:, 0]>lower_0baseline))
         |
    ((Diad_short[:, 0]<upper_1baseline) &(Diad_short[:, 0]>lower_1baseline))]

    # Calculates the median for the baseline and the standard deviation
    Median_Baseline=np.mean(Baseline_with_outl[:, 1])
    Std_Baseline=np.std(Baseline_with_outl[:, 1])

    # Removes any points in the baseline outside of 2 sigma (helps remove cosmic rays etc).
    Baseline=Baseline_with_outl[(Baseline_with_outl[:, 1]<Median_Baseline+sigma*Std_Baseline)
                                &
                                (Baseline_with_outl[:, 1]>Median_Baseline-sigma*Std_Baseline)
                               ]


    #Baseline=Baseline_with_outl




        # Fits a polynomial to the baseline of degree
    # Fits a polynomial to the baseline of degree
    Pf_baseline = np.poly1d(np.polyfit(Baseline[:, 0], Baseline[:, 1], N_poly))
    Py_base = Pf_baseline(Diad_short[:, 0])

    Baseline_ysub = Pf_baseline(Baseline[:, 0])
    Baseline_x = Baseline[:, 0]
    y_corr = Diad_short[:, 1] - Py_base
    x = Diad_short[:, 0]

    # Now filter out all the points I dont want in my background subtracted data

    # 1) points rejected by the sigma filter (from the baseline-candidate set)
    sigma_rejected_x = Baseline_with_outl[
        (Baseline_with_outl[:, 1] >= Median_Baseline + sigma * Std_Baseline) |
        (Baseline_with_outl[:, 1] <= Median_Baseline - sigma * Std_Baseline)
    ][:, 0]

    drop = np.zeros(len(x), dtype=bool)

    # 2) points in user-specified exclude ranges (if provided)
    if exclude_range1 is not None:
        drop |= (x >= exclude_range1[0]) & (x <= exclude_range1[1])
    if exclude_range2 is not None:
        drop |= (x >= exclude_range2[0]) & (x <= exclude_range2[1])

    # 3) points sigma-filtered out of the baseline windows
    if sigma_rejected_x.size > 0:
        drop |= np.isin(x, sigma_rejected_x)

    # apply the mask consistently to all aligned arrays/outputs
    keep = ~drop
    x = x[keep]
    y_corr = y_corr[keep]
    Py_base = Py_base[keep]
    Diad_short = Diad_short[keep, :]





     # Plotting what its doing
    if plot_figure is True:

        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12,4))
        ax1.set_title('Background fit')
        ax1.plot(Diad[:, 0], Diad[:, 1], '-', color='grey')
        ax1.plot(Diad_short[:, 0], Diad_short[:, 1], '-r', label='region_2_bck_sub')

        #ax1.plot(Baseline[:, 0], Baseline[:, 1], '-b', label='Bck points')
        ax1.plot(Baseline[:, 0], Baseline[:, 1], '.b', label='Bck points')
        ax1.plot(Diad_short[:, 0], Py_base, '-k')



        ax1_ymin=np.min(Baseline[:, 1])-10*np.std(Baseline[:, 1])
        ax1_ymax=np.max(Baseline[:, 1])+10*np.std(Baseline[:, 1])
        ax1_xmin=lower_0baseline-30
        ax1_xmax=upper_1baseline+30
        # Adding patches


        rect_diad1_b1=patches.Rectangle((lower_0baseline, ax1_ymin),upper_0baseline-lower_0baseline,ax1_ymax-ax1_ymin,
                              linewidth=1,edgecolor='none',facecolor='cyan', label='bck', alpha=0.3, zorder=0)
        ax1.add_patch(rect_diad1_b1)
        rect_diad1_b2=patches.Rectangle((lower_1baseline, ax1_ymin),upper_1baseline-lower_1baseline,ax1_ymax-ax1_ymin,
                              linewidth=1,edgecolor='none',facecolor='cyan', alpha=0.3, zorder=0)
        ax1.add_patch(rect_diad1_b2)
        ax1.set_xlim([ax1_xmin, ax1_xmax])
        ax1.set_ylim([ax1_ymin, ax1_ymax])

        ax1.set_ylabel('Intensity')
        ax2.set_ylabel('Intensity')
        ax2.set_xlabel('Wavenumber')
        ax1.legend()



        ax2.set_title('Background subtracted')
        ax2.plot(x, y_corr, '-r')
        height_p=np.max(Diad_short[:, 1])-np.min(Diad_short[:, 1])
        ax2.set_ylim([np.min(y_corr), 1.2*height_p ])
        ax1.set_xlabel('Wavenumber')


    return y_corr, Py_base, x,  Diad_short, Py_base, Pf_baseline, Baseline_ysub, Baseline_x, Baseline, span





[docs]
def add_peak(*, prefix=None, center=None, model_name='VoigtModel',
min_cent=None, max_cent=None, min_sigma=None, max_sigma=None, amplitude=100, min_amplitude=None, max_amplitude=None, sigma=0.2):
    """
    This function iteratively adds a peak using lmfit, used for iteratively fitting HB, C13 etc.

    Parameters
    --------------
    prefix: str
        Prefix added to peak name, e.g. 'diad1', so it reads diad1_center, diad1_sigma etc.

    center: float, int
        Estimate of peak center (e.g. x coordinate of peak)

    sigma: float, int (default 0.2)
        Estimate of sigma of peak fit

    amplitude: float, int
        Estimate of amplitude of peakf it


    model_name: str 'VoigtModel' or 'PseudoVoigtModel'
        Type of peak which is being added.

    Optional:

    min_cent, max_cent: float
        Minimum and maximum x coordinate to place peak center at

    min_sigma, max_sigma: float
        Minimum and maximum sigma of peak fit

    min_amplitude, max_amplitude: float
        Minimum and maximum sigma of peak fit

    Returns
    ---------------

    peak: Fitted model
    params: Peak fit parameters

    """
    Model_combo = globals()[model_name](prefix=prefix)

    peak =  Model_combo
    pars = peak.make_params()

    if min_cent is not None and max_cent is not None:
        pars[prefix + 'center'].set(center, min=min_cent, max=max_cent)
    else:
        pars[prefix + 'center'].set(center)


    pars[prefix + 'amplitude'].set(amplitude, min=min_amplitude, max=max_amplitude)

    if min_sigma is not None:
        pars[prefix+'sigma'].set(sigma, max=max_sigma)

    if min_sigma is not None and max_sigma is not None:
        pars[prefix+'sigma'].set(sigma, max=max_sigma, min=min_sigma)

    else:
        pars[prefix + 'sigma'].set(sigma, min=0)


    return peak, pars







## Overall function for fitting diads in 1 single step

[docs]
@dataclass
class diad1_fit_config:
    """
    Configuration class for fitting Diad 1

    This class stores configuration parameters for fitting Diad 1 and associated peaks.
    The class allows for easy customization of the fitting process.

    Attributes:
        model_name (str): Default 'PseudoVoigtModel'.
            Model name for peak fitting. Choose from
            'PseudoVoigtModel', 'VoigtModel', 'GaussianModel', 'LorentzianModel', etc.

        fit_peaks (int): Default 2
            Number of peaks to fit. 2 = Diad + HB, 1 = Diad.

        N_poly_bck_diad1 (float): Default = 1
            Degree of polynomial for background subtraction. Default 1.

        lower_bck_diad1 (Tuple[float, float]):
            Lower boundary for background fitting in cm-1. Default (1180, 1220)

        upper_bck_diad1 (Tuple[float, float]):
            Upper boundary for background fitting in cm-1 Default (1300, 1350).

        fit_gauss (Optional[bool]): Default True
            Fit a Gaussian peak if True. Helpful for very elevated spectra

        gauss_amp (Optional[float]):  Default = 1000
            Gaussian peak amplitude if `fit_gauss` is True. Default 1000.
            We find 2X HB intensity is a good guess on many instruments

        diad_sigma (float): Default = 0.2
            Sigma of diad peak.

        diad_sigma_min_allowance (float): Default = 0.2
            Tolerance on entered sigma. Means minimum allowed sigma is 0.2*sigma

        diad_sigma_max_allowance (float): Default = 5
            Tolerance on entered sigma. Means minimum allowed sigma is 5*sigma


        diad_prom (float): Default = 100
            Peak amplitude for of the diad. Suggest users obtain from the estimated peak parameter dataframe

        HB_prom (float):  Default = 20
            Peak amplitude for HB. Suggest users obtain from the estimated peak parameter dataframe

        HB_sigma_min_allowance (float): Default = 0.05
            Means HB sigma cant be less than 0.05* sigma of first fitting peak (diad).

        HB_sigma_max_allowance (float): Default = 3:
            Means HB sigma cant exceed 3* sigma of first fitting peak (diad)

        HB_amp_min_allowance (float): Default =0.01
            Means HB amplitude cant be less than 0.01*amplitude of first fitted peak (diad).

        HB_amp_max_allowance (float): Default = 1
            Means HB amplitude cant be more than 1*amplitude of first fitted peak (diad).

        x_range_baseline (float): Default 75.
            Means that x axis of baseline shows diad position +- 75 x units.

        y_range_baseline (float): Y-axis range for baseline display. Default 100.
            Shows a y axis scale that is 100 y units above the minimum baseline value, and 100 units above the maximum value

        dpi (float): Default 200
            Figure resolution in dots per inch. Default 200.

        x_range_residual (float):  Default 20.
            Shows x values +-20 either side of the diad peak position in the residual plot.

        return_other_params (bool): Return non-fitted parameters if True. Default False.

    Methods:
        update_par(**kwargs):
            Updates configuration parameters. Raises AttributeError for unknown attributes.

    """
    # What model to use
    model_name: str = 'PseudoVoigtModel'
    fit_peaks:int = 2

    # Degree of polynomial to use
    N_poly_bck_diad1: float =1

    # Background/baseline positions
    lower_bck_diad1: Tuple[float, float]=(1180, 1220)
    upper_bck_diad1: Tuple[float, float]=(1300, 1350)

    # Do you need a gaussian? Set position here if so
    fit_gauss: Optional [bool] =False
    gauss_amp: Optional [float]=1000

    diad_sigma: float=0.2
    diad_sigma_min_allowance: float=0.2
    diad_sigma_max_allowance: float=5




    # Peak amplitude
    diad_prom: float = 100
    HB_prom: float = 20
    HB_sigma_min_allowance=0.05
    HB_sigma_max_allowance=3
    HB_amp_min_allowance=0.01
    HB_amp_max_allowance=1
    # How much to show on x anx y axis of figure showing background
    x_range_baseline: float=75
    y_range_baseline: float=100

    #Do you want to save the figure?


    dpi: float = 200
    x_range_residual: float=20

    # Do you want to return other parameters?
    return_other_params: bool =False


[docs]
    def update_par(self, **kwargs):
        for key, value in kwargs.items():
            if hasattr(self, key):
                setattr(self, key, value)
            else:
                raise AttributeError(f"'diad1_fit_config' object has no attribute '{key}'")







[docs]
@dataclass
class diad2_fit_config:

    """
    Configuration class for fitting Diad 2

    This class stores configuration parameters for fitting Diad 2 and associated peaks.
    The class allows for easy customization of the fitting process.

    Attributes:
        model_name (str): Default 'PseudoVoigtModel'.
            Model name for peak fitting. Choose from
            'PseudoVoigtModel', 'VoigtModel', 'GaussianModel', 'LorentzianModel', etc.

        fit_peaks (int): Default 2
            Number of peaks to fit. 3 = HB + Diad + C13, 2 = Diad + HB, 1 = Diad.

        N_poly_bck_diad2 (float): Default = 1
            Degree of polynomial for background subtraction. Default 1.

        lower_bck_diad2 (Tuple[float, float]):
            Lower boundary for background fitting in cm-1. Default (1300, 1360)

        upper_bck_diad2 (Tuple[float, float]):
            Upper boundary for background fitting in cm-1 Default (1440, 1470)

        fit_gauss (Optional[bool]): Default True
            Fit a Gaussian peak if True. Helpful for very elevated spectra

        gauss_amp (Optional[float]):  Default = 1000
            Gaussian peak amplitude if `fit_gauss` is True. Default 1000.
            We find 2X HB intensity is a good guess on many instruments

        diad_sigma (float): Default = 0.2
            Sigma of diad peak.

        diad_sigma_min_allowance (float): Default = 0.2
            Tolerance on entered sigma. Means minimum allowed sigma is 0.2*sigma

        diad_sigma_max_allowance (float): Default = 5
            Tolerance on entered sigma. Means minimum allowed sigma is 5*sigma


        diad_prom (float): Default = 100
            Peak amplitude for of the diad. Suggest users obtain from the estimated peak parameter dataframe

        HB_prom (float):  Default = 20
            Peak amplitude for HB. Suggest users obtain from the estimated peak parameter dataframe

        C13_prom (float):  Default = 10
            Peak amplitude for C13. Suggest users obtain from the estimated peak parameter dataframe

        HB_sigma_min_allowance (float): Default = 0.05
            Means HB sigma cant be less than 0.05* sigma of first fitting peak (diad).

        HB_sigma_max_allowance (float): Default = 3:
            Means HB sigma cant exceed 3* sigma of first fitting peak (diad)

        HB_amp_min_allowance (float): Default =0.01
            Means HB amplitude cant be less than 0.01*amplitude of first fitted peak (diad).

        HB_amp_max_allowance (float): Default = 1
            Means HB amplitude cant be more than 1*amplitude of first fitted peak (diad).

        x_range_baseline (float): Default 75.
            Means that x axis of baseline shows diad position +- 75 x units.

        y_range_baseline (float): Y-axis range for baseline display. Default 100.
            Shows a y axis scale that is 100 y units above the minimum baseline value, and 100 units above the maximum value

        dpi (float): Default 200
            Figure resolution in dots per inch. Default 200.

        x_range_residual (float):  Default 20.
            Shows x values +-20 either side of the diad peak position in the residual plot.

        return_other_params (bool): Return non-fitted parameters if True. Default False.

    Methods:
        update_par(**kwargs):
            Updates configuration parameters. Raises AttributeError for unknown attributes.

    """

    # Do you need a gaussian? Set position here if so

    # What model to use
    model_name: str = 'PseudoVoigtModel'
    fit_peaks:int = 3

    # Degree of polynomial to use
    N_poly_bck_diad2: float =1
    # Threshold intesnity
    threshold_intensity=50

    # Background/baseline positions
    lower_bck_diad2: Tuple[float, float]=(1300, 1360)
    upper_bck_diad2: Tuple[float, float]=(1440, 1470)


    fit_gauss: Optional [bool] =False
    gauss_amp: Optional [float]=1000

    diad_sigma: float=0.2
    diad_sigma_min_allowance: float=0.2
    diad_sigma_max_allowance: float=5


    # Peak amplitude
    diad_prom: float = 100
    HB_prom: float = 20
    C13_prom: float=10
    HB_sigma_min_allowance=0.05
    HB_sigma_max_allowance=3
    HB_amp_min_allowance=0.01
    HB_amp_max_allowance=1
    # How much to show on x anx y axis of figure showing background with all peaks imposed
    x_range_baseline: float=75
    y_range_baseline: float=100

    #Do you want to save the figure?
    plot_figure: bool = True
    dpi: float = 200
    x_range_residual: float=20

    # Do you want to return other parameters?
    return_other_params: bool =False




[docs]
    def update_par(self, **kwargs):
        for key, value in kwargs.items():
            if hasattr(self, key):
                setattr(self, key, value)
            else:
                raise AttributeError(f"'diad2_fit_config' object has no attribute '{key}'")



## Testing generic model



[docs]
def fit_gaussian_voigt_generic_diad(config1, *, diad1=False, diad2=False, path=None, filename=None, xdat=None, ydat=None, span=None,  plot_figure=True, dpi=200, Diad_pos=None, HB_pos=None, C13_pos=None, fit_peaks=None, block_print=True, py_baseline=None, minimise='weighted_least_squares'):


    """ This function fits a voigt/Pseudovoigt curves to background subtracted data for your diads
    +- HBs+-C13 peaks. It can also fit a second Gaussian background iteratively with these peaks


    Parameters
    -----------
    config1: fit configuration file, from diad2_fit_config or diad1_fit_config data classes.
        Detailed descriptions of things in this file are found in docs for fit_diad_2_w_bck, and fit_diad_1_w_bck

    diad1: bool
        True if fitting Diad1, False if Diad2

    diad2: bool
        True if fitting Diad2, False if Diad1

    path: str
        Path to spectra folder. Used to make a folder inside this folder to store peak fit images in

    filename: str
        filename of specific diad being fitted

    xdat: np.array
        x coordinates of the spectra

    ydat: np.array
        y coordinates of the spectra

    span: list (e.g. [1100, 1300])
        Comes from remove_diad_baseline function. Sets min and max x coordinate of new linspace
        for plotting the overall peak fit over the data

    plot_figure: bool
        if True, plots figure from this function specifically. Even if False, if called through
        fit_diad_2_w_bck or fit_diad_1_w_bck, get a figure from those functions with relevant data on

    dpi: int
        dpi of saved figure

    Diad_pos, HB_pos, C13_pos: float
        Initial guess of Diad, HB, C13 pos

    fit_peaks: int
        Number of peaks to fit. 1= just diad, 2 = diad + HB, 3 = diad+ HB +C13

    block_print: bool
        If true, prints a few status updates as it goes to help debug

    py_baseline:
        the baseline from the diadbaseline function

    minimise='weighted_least_squares' or 'least squares'
        uses a weighted least squares cost function. Prior to 1.0.19, least squares was default.

    Returns
    -------------------
    result, df_out, y_best_fit, x_lin, components, xdat, ydat, ax1_xlim, ax2_xlim, config1.fit_gauss, residual_diad_coords, ydat_inrange,  xdat_inrange

    result: lmfit.model.ModelResult (model form lmfit)
    df_out: dataframe of peak fit parameters
    y_best_fit: np.array of best peak fit y parameters
    x_lin: np.array of x coordinates corresponding to y_best_fit
    components: y coordinates of the different components of the peak fit.
    xdat: input xdat
    ydat: input ydat
    ax1_xlim, ax2_xlim: xlim for plot in final figure in fit_w_bck for different fit components
    residual_diad_coordinates: np.array of y coordinate of residual plot
    ydat_inrange: np.array of data within 'span'
    xdat_inrange:np.array of data within 'span'



    """



    # Calculate the amplitude from the sigma and the prominence
    calc_diad_amplitude=((config1.diad_sigma)*(config1.diad_prom))/0.3939
    calc_HB_amplitude=((config1.diad_sigma)*(config1.HB_prom))/0.3939

    # If ask for Gaussian, but fit peaks is 1, remove gaussian
    if fit_peaks==1 and config1.fit_gauss is True:
        config1.fit_gauss=False


    if diad2 is True and fit_peaks==3:
        calc_C13_amplitude=(0.5*(config1.diad_sigma)*(config1.C13_prom))/0.3939
    # Gets overridden if you have triggered any of the warnings
    refit=False
    refit_param='Flagged Warnings:'
    spec_res=np.abs(xdat[1]-xdat[0])






    initial_guess=Diad_pos
    HB_initial_guess=HB_pos
    C13_initial_guess=C13_pos


    model_ini = globals()[config1.model_name]()




    # Create initial peak params
    # Set peak position to 2 spectral res units either side of the initial guess made above
    params_ini = model_ini.make_params(center=initial_guess, max=initial_guess+spec_res*5, min=initial_guess-spec_res*5)
    # Set the amplitude to within X0.1 or 10X of the guessed amplitude
    params_ini['amplitude'].set(calc_diad_amplitude, min=calc_diad_amplitude/5,
                                 max=calc_diad_amplitude*5)
    # Set sigma to be the entered value, then
    params_ini['sigma'].set(config1.diad_sigma, min=config1.diad_sigma*config1.diad_sigma_min_allowance,
                                max=config1.diad_sigma*config1.diad_sigma_max_allowance)

    # Now lets tweak an initial model with just 1 peak, this really helps with the fit
    init_ini = model_ini.eval(params_ini, x=xdat)
    result_ini  = model_ini.fit(ydat, params_ini, x=xdat)
    comps_ini  = result_ini.eval_components()
    Center_ini=result_ini.params.get('center')
    Amplitude_ini=result_ini.params.get('amplitude')
    sigma_ini=result_ini.params.get('sigma')
    fwhm_ini=result_ini.params.get('fwhm')



    # For relatively weak peaks, you wont want a gaussian background
    if config1.fit_gauss is False:

        # If there is 1 peak, e.g. if have a Nan for hotband
        if fit_peaks==1:
            model_F = globals()[config1.model_name](prefix='lz1_')
            pars1 = model_F.make_params()
            pars1['lz1_'+ 'amplitude'].set(calc_diad_amplitude, min=0, max=calc_diad_amplitude*10)
            pars1['lz1_'+ 'center'].set(Center_ini, min=Center_ini-5*spec_res, max=Center_ini+5*spec_res)
            pars1['lz1_'+ 'sigma'].set(config1.diad_sigma, min=config1.diad_sigma*config1.diad_sigma_min_allowance,
                         max=config1.diad_sigma*config1.diad_sigma_max_allowance)
            params=pars1


        # If there is more than one peak
        else:
            # Previous versions had a constant for PV and P4 but not Voigt. Now removed for all
            model1 = globals()[config1.model_name](prefix='lz1_')
            pars1 = model1.make_params()
            pars1['lz1_'+ 'amplitude'].set(calc_diad_amplitude, min=0, max=calc_diad_amplitude*10)

            pars1['lz1_'+ 'center'].set(Center_ini, min=Center_ini-3*spec_res, max=Center_ini+3*spec_res)

            pars1['lz1_'+ 'sigma'].set(config1.diad_sigma, min=config1.diad_sigma*config1.diad_sigma_min_allowance,
                        max=config1.diad_sigma*config1.diad_sigma_max_allowance)




            if fit_peaks>1:
                model_F=model1
                params=pars1

            if fit_peaks==2:

                # Second wee peak
                prefix='lz2_'

                peak = globals()[config1.model_name](prefix='lz2_')


                pars = peak.make_params()

                pars[prefix + 'center'].set(HB_initial_guess,
                    min=HB_initial_guess-2*spec_res, max=HB_initial_guess+2)

                pars[prefix + 'amplitude'].set(calc_HB_amplitude, min=Amplitude_ini*config1.HB_amp_min_allowance, max=Amplitude_ini*config1.HB_amp_max_allowance)
                pars[prefix+ 'sigma'].set(sigma_ini, min=sigma_ini*config1.HB_sigma_min_allowance, max=sigma_ini*config1.HB_sigma_max_allowance)


                model_F=model1+peak

                # updating pars 1 with values in pars
                pars1.update(pars)
                params=pars1


                # Attempt at stabilizing peak fit
                result = model_F.fit(ydat, pars1, x=xdat)
                result.params['lz2_center'].vary = False
                result.params['lz2_amplitude'].vary = False
                result.params['lz2_sigma'].vary = False
                final_result = model_F.fit(ydat, result.params, x=xdat)
                params.update(final_result.params)
                params.update(final_result.params)
                result=final_result







            if fit_peaks==3:
                if block_print is False:
                    print('Trying to iteratively fit 3 peaks')

                peak_pos_left=np.array([HB_initial_guess, C13_initial_guess])

                for i, cen in enumerate(peak_pos_left):

                    if i==0: # This is the hotband
                        peak, pars = add_peak(prefix='lz%d_' % (i+2), center=cen,
                        min_cent=cen-3*spec_res, max_cent=cen+3*spec_res, sigma=sigma_ini,
                        min_sigma=sigma_ini*config1.HB_sigma_min_allowance,
                        max_sigma=sigma_ini*config1.HB_sigma_max_allowance,
                        amplitude=calc_HB_amplitude,
                        min_amplitude=Amplitude_ini*config1.HB_amp_min_allowance,
                        max_amplitude=Amplitude_ini*config1.HB_amp_max_allowance,
                        model_name=config1.model_name)
                        model = peak+model1
                        params.update(pars)
                    if i==1: # This is c13
                        peak, pars = add_peak(prefix='lz%d_' % (i+2), center=cen,
                        min_cent=cen-3*spec_res, max_cent=cen+3*spec_res,
                        sigma=sigma_ini/5,
                        min_sigma=sigma_ini/20,
                        max_sigma=sigma_ini/2,
                        amplitude=calc_C13_amplitude,
                        min_amplitude=calc_C13_amplitude*(0.5*config1.HB_amp_min_allowance),
                        max_amplitude=calc_C13_amplitude*2*(config1.HB_amp_max_allowance),
                        model_name=config1.model_name)
                        model = peak+model
                        params.update(pars)




                model_F=model



                # Attempt at stabilizing peak fit
                result = model_F.fit(ydat, pars1, x=xdat)
                result.params['lz2_center'].vary = False
                result.params['lz2_amplitude'].vary = False
                result.params['lz2_sigma'].vary = False
                result.params['lz3_center'].vary = False
                result.params['lz3_amplitude'].vary = False
                result.params['lz3_sigma'].vary = False
                final_result = model_F.fit(ydat, result.params, x=xdat)
                params.update(final_result.params)
                params.update(final_result.params)
                result=final_result




    # Same, but also with a Gaussian Background
    if config1.fit_gauss is not False:


        # making the gaussian model
        model1 = GaussianModel(prefix='bkg_')
        params = model1.make_params()
        params['bkg_'+'amplitude'].set(config1.gauss_amp, min=config1.gauss_amp/100, max=config1.gauss_amp*10)
        params['bkg_'+'sigma'].set(sigma_ini*15, min=sigma_ini*10, max=sigma_ini*25)
        params['bkg_'+'center'].set(initial_guess, min=initial_guess-7, max=initial_guess+7)



        if fit_peaks==2:
            peak_pos_voigt=np.array([initial_guess, HB_initial_guess])

            for i, cen in enumerate(peak_pos_voigt):
                if i==0: # This is the Diad
                    peak, pars = add_peak(prefix='lz%d_' % (i+1),
                center=Center_ini, min_cent=cen-3*spec_res, max_cent=cen+3*spec_res,
                sigma=config1.diad_sigma,
                max_sigma=config1.diad_sigma*config1.diad_sigma_max_allowance,
                min_sigma=config1.diad_sigma*config1.diad_sigma_min_allowance,
                amplitude=calc_diad_amplitude, min_amplitude=0, max_amplitude=10*calc_diad_amplitude,
                model_name=config1.model_name)
                model = peak+model1
                params.update(pars)

                if i==1: # This is the hotband
                    peak, pars = add_peak(prefix='lz%d_' % (i+1), center=cen,
                        min_cent=cen-3*spec_res, max_cent=cen+3*spec_res, sigma=sigma_ini,
                        min_sigma=sigma_ini*config1.HB_sigma_min_allowance,
                        max_sigma=sigma_ini*config1.HB_sigma_max_allowance,
                        amplitude=calc_HB_amplitude,
                        min_amplitude=Amplitude_ini*config1.HB_amp_min_allowance,
                        max_amplitude=Amplitude_ini*config1.HB_amp_max_allowance,
                        model_name=config1.model_name)
                    model2 = peak+model
                    params.update(pars)


            # Attempt at stabilizing peak fit
# updating pars 1 with values in pars

            result = model2.fit(ydat, params, x=xdat)


            # Set the parameters of 'lz1' to be fixed
            result.params['lz2_center'].vary = False
            result.params['lz2_amplitude'].vary = False
            result.params['lz2_sigma'].vary = False
            result.params['bkg_center'].vary = False
            result.params['bkg_amplitude'].vary = False
            result.params['bkg_sigma'].vary = False

            # Perform fitting using the entire model (model2) with the stabilized 'lz1' parameters
            final_result = model2.fit(ydat, result.params, x=xdat)
            center_error = final_result.params['lz1_center'].stderr
            params.update(final_result.params)


            result=final_result
            model_F=model2


        if fit_peaks==3:
            peak_pos_voigt=np.array([initial_guess, HB_initial_guess, C13_initial_guess])

            for i, cen in enumerate(peak_pos_voigt):

                if i==0: # This is the Diad
                    peak, pars = add_peak(prefix='lz%d_' % (i+1),
                center=Center_ini, min_cent=cen-3*spec_res, max_cent=cen+3*spec_res,
                sigma=config1.diad_sigma,
                max_sigma=config1.diad_sigma*config1.diad_sigma_max_allowance,
                min_sigma=config1.diad_sigma*config1.diad_sigma_min_allowance,
                amplitude=calc_diad_amplitude, min_amplitude=0, max_amplitude=10*calc_diad_amplitude,
                model_name=config1.model_name)
                model = peak+model1
                params.update(pars)

                if i==1: # This is the hotband

                    peak, pars = add_peak(prefix='lz%d_' % (i+1), center=cen,
                        min_cent=cen-3*spec_res, max_cent=cen+3*spec_res, sigma=sigma_ini,
                        min_sigma=sigma_ini*config1.HB_sigma_min_allowance,
                        max_sigma=sigma_ini*config1.HB_sigma_max_allowance,
                        amplitude=calc_HB_amplitude,
                        min_amplitude=Amplitude_ini*config1.HB_amp_min_allowance,
                        max_amplitude=Amplitude_ini*config1.HB_amp_max_allowance,
                        model_name=config1.model_name)
                    model2 = peak+model
                    params.update(pars)

                if i==2: # This is c13
                    peak, pars = add_peak(prefix='lz%d_' % (i+1), center=cen,
                        min_cent=cen-3*spec_res, max_cent=cen+3*spec_res,
                        sigma=sigma_ini/5,
                        min_sigma=sigma_ini/20,
                        max_sigma=sigma_ini/2,
                        amplitude=calc_C13_amplitude,
                        min_amplitude=calc_C13_amplitude*(0.5*config1.HB_amp_min_allowance),
                        max_amplitude=calc_C13_amplitude*2*(config1.HB_amp_max_allowance),
                        model_name=config1.model_name)
                    model3 = peak+model2
                    params.update(pars)

            result = model3.fit(ydat, params, x=xdat)


            # Set the parameters of 'lz1' to be fixed
            result.params['lz2_center'].vary = False
            result.params['lz2_amplitude'].vary = False
            result.params['lz2_sigma'].vary = False
            result.params['lz3_center'].vary = False
            result.params['lz3_amplitude'].vary = False
            result.params['lz3_sigma'].vary = False
            result.params['bkg_center'].vary = False
            result.params['bkg_amplitude'].vary = False
            result.params['bkg_sigma'].vary = False

            # Perform fitting using the entire model (model2) with the stabilized 'lz1' parameters
            final_result = model3.fit(ydat, result.params, x=xdat)
            center_error = final_result.params['lz1_center'].stderr

            result=final_result
            model_F=model3

            params.update(final_result.params)









    if minimise=='least_squares':
        print('using least squares')
    # all points given equal weight - used by default
    # until Diadfit 1.0.18
        init = model_F.eval(params, x=xdat)
        result = model_F.fit(ydat, params, x=xdat)
        comps = result.eval_components()

    elif minimise=='weighted_least_squares':
        print('using weighted least squares')
        # Operate on total acounts, so add back in baseline
        total_counts_obs = ydat + py_baseline

        # Calculate weights (1/sigma)
        weights = 1.0 / np.sqrt(np.maximum(total_counts_obs, 1.0))

        # 4. Final fit
        # We use scale_covar=True to let lmfit scale the errors based on the
        # fit quality (the residuals), recomended if we dont know the absolute size of the errors/weights and we dont because there could easily be gain on the detector, other noise sources etc
        result = model_F.fit(ydat, params, x=xdat,
                            weights=weights,
                            scale_covar=True)


    else:
        raise ValueError(
            f"Invalid minimise option: '{minimise}'. "
            "Please specify minimise='least_squares' or "
            "minimise='weighted_least_squares'."
        )

    # print(f"Fityk-style Weighted RedChi: {result.redchi:.4f}")
    # print(f"Error: {result.params['lz1_center'].stderr:.5f}")
    #
    # print('Weighting accounts for 5 averaged acquisitions and fitted baseline.')
    # print(f"Reduced Chi-Square 2: {result.redchi}")

    # 6. Evaluate components for plotting
    comps = result.eval_components()
    reduced_chi_squared = np.sqrt(result.redchi)

    # Other things
    #print(result.best_values)
    # Get first peak center
    Peak1_Cent=result.best_values.get('lz1_center')
    Peak1_Int=result.best_values.get('lz1_amplitude')
    Peak1_Sigma=result.best_values.get('lz1_sigma')
    Peak1_gamma=result.best_values.get('lz1_gamma')
    Peak1_Prop_Lor=result.best_values.get('lz1_fraction')
    Peak1_fwhm=result.params['lz1_fwhm'].value


    error_pk2 = result.params['lz1_center'].stderr



    x_lin=np.linspace(span[0], span[1], 2000)
    y_best_fit=result.eval(x=x_lin)
    components=result.eval_components(x=x_lin)




    if config1.fit_gauss is not False:
        Gauss_cent=result.best_values.get('bkg_center')
        Gauss_amp=result.best_values.get('bkg_amplitude')
        Gauss_sigma=result.best_values.get('bkg_sigma')

        max_Gauss=np.max(components.get('bkg_'))
        max_voigt=np.max(components.get('lz1_'))
        if max_Gauss>max_voigt/5:
            refit_param=str(refit_param)+ ' G_higher_than_diad'
        # if Gauss_sigma>=(config1.gauss_sigma*10-0.1):
        #     refit=True
        #     refit_param=str(refit_param)+' G_input_TooLowSigma'
        # if Gauss_sigma<=(config1.gauss_sigma/10+0.1):
        #     refit=True
        #     refit_param=str(refit_param)+ ' G_input_TooHighSigma'
        #
        # if Gauss_amp>=(config1.gauss_amp*10-0.1):
        #     refit=True
        #     refit_param=str(refit_param)+' G_input_TooLowAmp'
        # if Gauss_amp<=(config1.gauss_sigma+0.1):
        #     refit=True
        #     refit_param=str(refit_param)+' G_Amp_too_high'
        if Gauss_cent<=(config1.fit_gauss-30+0.5):
            refit=True
            refit_param=str(refit_param)+' G_InputTooLowCent'
        if Gauss_cent>=(initial_guess+30-0.5):
            refit=True
            refit_param=str(refit_param)+' G_InputTooHighCent'





    # print('fwhm gauss')
    # print(result.best_values)




    #

    # Work out what peak is what
    if diad2 is True:

        if fit_peaks==1:

            Peak2_Cent=None
            Peak3_Cent=None
            ax1_xlim=[Center_ini-15, Center_ini+15]
            ax2_xlim=[Center_ini-15, Center_ini+15]

        if fit_peaks==2:

            Peak2_Cent=result.best_values.get('lz2_center')
            Peak2_Int=result.best_values.get('lz2_amplitude')
            Peak2_Sigma=result.best_values.get('lz2_sigma')
            Peak3_Cent=None
            ax1_xlim=[Center_ini-15, Center_ini+30]
            ax2_xlim=[Center_ini-15, Center_ini+30]

        if fit_peaks==3:

            Peak2_Cent=result.best_values.get('lz2_center')
            Peak2_Int=result.best_values.get('lz2_amplitude')
            Peak2_Sigma=result.best_values.get('lz2_sigma')
            Peak3_Cent=result.best_values.get('lz3_center')
            Peak3_Int=result.best_values.get('lz3_amplitude')
            Peak3_Sigma=result.best_values.get('lz3_sigma')

            ax1_xlim=[Center_ini-30, Center_ini+30]
            ax2_xlim=[Center_ini-30, Center_ini+30]

        if Peak2_Cent is None:
            df_out=pd.DataFrame(data={'Diad2_Voigt_Cent': Peak1_Cent,
                                    'Diad2_cent_err': error_pk2,
                                    'Diad2_Voigt_Area': Peak1_Int,
                                'Diad2_Voigt_Sigma': Peak1_Sigma,
                                'Diad2_Voigt_Gamma': Peak1_gamma,


            }, index=[0])

        if Peak2_Cent is not None:


            if Peak3_Cent is None:
                Peaks=np.array([Peak1_Cent, Peak2_Cent])
                # Diad is lower peak
                Diad2_Cent=np.min(Peaks)
                # Hot band is upper peak'
                HB2_Cent=np.max(Peaks)
                # Allocate areas
                if Diad2_Cent==Peak2_Cent:
                    Diad2_Int=Peak2_Int
                    HB2_Int=Peak1_Int
                    HB2_Sigma=Peak1_Sigma
                if Diad2_Cent==Peak1_Cent:
                    Diad2_Int=Peak1_Int
                    HB2_Int=Peak2_Int
                    HB2_Sigma=Peak2_Sigma




            if Peak3_Cent is not None:
                Peaks=np.array([Peak1_Cent, Peak2_Cent, Peak3_Cent])

                # C13 is lower peak
                C13_Cent=np.min(Peaks)
                # Diad is medium peak
                Diad2_Cent=np.median(Peaks)
                # Hot band is upper peak'
                HB2_Cent=np.max(Peaks)

                if Diad2_Cent==Peak2_Cent:
                    Diad2_Int=Peak2_Int
                if Diad2_Cent==Peak1_Cent:
                    Diad2_Int=Peak1_Int
                if Diad2_Cent==Peak3_Cent:
                    Diad2_Int=Peak3_Int
                # Same for hotband
                if HB2_Cent==Peak2_Cent:
                    HB2_Int=Peak2_Int
                    HB2_Sigma=Peak2_Sigma
                if HB2_Cent==Peak1_Cent:
                    HB2_Int=Peak1_Int
                    HB2_Sigma=Peak1_Sigma
                if HB2_Cent==Peak3_Cent:
                    HB2_Int=Peak3_Int
                    HB2_Sigma=Peak2_Sigma


                # Same for C13
                if C13_Cent==Peak2_Cent:
                    C13_Int=Peak2_Int
                    C13_Sigma=Peak2_Sigma
                if C13_Cent==Peak1_Cent:
                    C13_Int=Peak1_Int
                    C13_Sigma=Peak1_Sigma
                if C13_Cent==Peak3_Cent:
                    C13_Int=Peak3_Int
                    C13_Sigma=Peak3_Sigma

            df_out=pd.DataFrame(data={'Diad2_Voigt_Cent': Diad2_Cent,
            'Diad2_cent_err': error_pk2,
                                    'Diad2_Voigt_Area': Diad2_Int,
                                    'Diad2_Voigt_Sigma': Peak1_Sigma,
                                    'Diad2_Voigt_Gamma': Peak1_gamma,
            }, index=[0])

            df_out['HB2_Cent']=HB2_Cent
            df_out['HB2_Area']=HB2_Int
            df_out['HB2_Sigma']=HB2_Sigma
            if  Peak3_Cent is not None:
                df_out['C13_Cent']=C13_Cent
                df_out['C13_Area']=C13_Int
                df_out['C13_Sigma']=C13_Sigma

        result_diad2_origx_all=result.eval(x=xdat)
        # Trim to be in range
        #print(result_diad2_origx_all)
        result_diad2_origx=result_diad2_origx_all[(xdat>span[0]) & (xdat<span[1])]
        ydat_inrange=ydat[(xdat>span[0]) & (xdat<span[1])]
        xdat_inrange=xdat[(xdat>span[0]) & (xdat<span[1])]
        residual_diad_coords=ydat_inrange-result_diad2_origx


        x_cent_lin=np.linspace(df_out['Diad2_Voigt_Cent']-1, df_out['Diad2_Voigt_Cent']+1, 20000)
        y_cent_best_fit=result.eval(x=x_cent_lin)
        diad_height = np.max(y_cent_best_fit)
        df_out['Diad2_Combofit_Height']= diad_height
        df_out.insert(0, 'Diad2_Combofit_Cent', np.nanmean(x_cent_lin[y_cent_best_fit==diad_height]))


        residual_diad2=np.sum(((ydat_inrange-result_diad2_origx)**2)**0.5)/(len(ydat_inrange))
        df_out['Diad2_Residual']=residual_diad2
        df_out['Diad2_Prop_Lor']= Peak1_Prop_Lor
        df_out['Diad2_fwhm']=Peak1_fwhm

        if config1.fit_gauss is not False:
            df_out['Diad2_Gauss_Cent']=Gauss_cent
            df_out['Diad2_Gauss_Area']=Gauss_amp
            df_out['Diad2_Gauss_Sigma']=Gauss_sigma


        #Checking whether you need any flags.
        if error_pk2 is None:
            refit=True
            refit_param=str(refit_param)+' No Error'


        if Peak1_Int>=(calc_diad_amplitude*10-0.1):
            refit=True
            refit_param=str(refit_param)+' V_LowAmp'
        if Peak1_Sigma>=(calc_diad_amplitude*10-0.1):
            refit=True
            refit_param=str(refit_param)+' V_HighAmp'
        if Peak1_Sigma>=(config1.diad_sigma*config1.diad_sigma_max_allowance-0.001):
            refit=True
            refit_param=str(refit_param)+' V_Sigma_too_Low'
        if  diad_height>(3*config1.diad_prom):
            refit_param=str(refit_param)+' V_Sigma_too_Low'
        if  diad_height<(0.5*config1.diad_prom):
            refit_param=str(refit_param)+' V_Sigma_too_High'
        if Peak1_Sigma<=(config1.diad_sigma*config1.diad_sigma_min_allowance+0.001):
            refit=True
            refit_param=str(refit_param)+' V_Sigma_too_High'
        if diad_height<config1.threshold_intensity:
            refit_param=str(refit_param)+' Below threshold intensity'

    #if findme


        if plot_figure is True:

            fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12,4))
            ax1.plot(x_lin, y_best_fit, '-g', linewidth=2, label='best fit')
            ax1.plot(xdat, ydat,  '.k', label='data')
            ax1.legend()
            ax1.set_xlim(ax1_xlim)
            ax2.set_xlim(ax2_xlim)






            if config1.fit_gauss is not False:
                ax2.plot(x_lin, components.get('bkg_'), '.c',linewidth=2,  label='Gaussian bck')
            ax2.plot(x_lin, components.get('lz1_'), '-b', linewidth=2, label='Peak1')
            ax2.plot(xdat, ydat, '.k')


        #ax2.plot(xdat, result.best_fit, '-g', label='best fit')

            fitspan=max(y_best_fit)-min(y_best_fit)
            ax2.set_ylim([min(y_best_fit)-fitspan/5, max(y_best_fit)+fitspan/5])


            ax1.set_ylabel('Intensity')
            ax1.set_xlabel('Wavenumber')
            ax2.set_ylabel('Intensity')
            ax2.set_xlabel('Wavenumber')


            if fit_peaks>1:
                ax2.plot(x_lin, components.get('lz2_'), '-r', linewidth=2, label='Peak2')

            if fit_peaks>2:
                ax2.plot(x_lin, components.get('lz3_'), '-m', linewidth=2, label='Peak3')


            ax2.legend()

            path3=path+'/'+'diad_fit_images'
            if os.path.exists(path3):
                out='path exists'
            else:
                os.makedirs(path+'/'+ 'diad_fit_images', exist_ok=False)


            file=filename.rsplit('.txt', 1)[0]
            fig.savefig(path3+'/'+'Diad2_Fit_{}.png'.format(file), dpi=dpi)




        best_fit=result.best_fit

        df_out['Diad2_refit']=refit_param

    if diad1 is True:



        if fit_peaks==1:
            Peak2_Cent=None
            Peak3_Cent=None
            ax1_xlim=[Center_ini-15, Center_ini+15]
            ax2_xlim=[Center_ini-15, Center_ini+15]
        if fit_peaks==2:
            Peak2_Cent=result.best_values.get('lz2_center')
            Peak2_Int=result.best_values.get('lz2_amplitude')
            Peak2_Sigma=result.best_values.get('lz2_sigma')
            # # Checking parameters for hot bands
            # if Peak2_Sigma>(sigma_ini*5-0.1):
            #     refit_param=str(refit_param)+' HB1_HighSigma'
            # if Peak2_Sigma<=(sigma_ini/20+0.1):
            #     refit_param=str(refit_param)+' HB1_LowSigma'
            #
            # if Peak2_Int>(Amplitude_ini/5-0.001):
            #     refit_param=str(refit_param)+' HB1_HighAmp'
            # if Peak2_Int<=(Amplitude_ini/100+0.001):
            #     refit_param=str(refit_param)+' HB1_LowAmp'


            Peak3_Cent=None
            ax1_xlim=[Center_ini-30, Center_ini+15]
            ax2_xlim=[Center_ini-30, Center_ini+15]



        if Peak2_Cent is None:
            df_out=pd.DataFrame(data={'Diad1_Voigt_Cent': Peak1_Cent,
            'Diad1_cent_err': error_pk2,
                                    'Diad1_Voigt_Area': Peak1_Int,
                                'Diad1_Voigt_Sigma': Peak1_Sigma,
                                'Diad1_Voigt_Gamma': Peak1_gamma,


            }, index=[0])
        if Peak2_Cent is not None:





            df_out=pd.DataFrame(data={'Diad1_Voigt_Cent': Peak1_Cent,
            'Diad1_cent_err': error_pk2,
                                    'Diad1_Voigt_Area': Peak1_Int,
                                    'Diad1_Voigt_Sigma': Peak1_Sigma,
                                    'Diad1_Voigt_Gamma': Peak1_gamma,
            }, index=[0])

            df_out['HB1_Cent']=Peak2_Cent
            df_out['HB1_Area']=Peak2_Int
            df_out['HB1_Sigma']=Peak2_Sigma


        result_diad1_origx_all=result.eval(x=xdat)
        # Trim to be in range
        #print(result_diad2_origx_all)
        result_diad1_origx=result_diad1_origx_all[(xdat>span[0]) & (xdat<span[1])]
        ydat_inrange=ydat[(xdat>span[0]) & (xdat<span[1])]
        xdat_inrange=xdat[(xdat>span[0]) & (xdat<span[1])]
        residual_diad_coords=ydat_inrange-result_diad1_origx


        x_cent_lin=np.linspace(df_out['Diad1_Voigt_Cent']-1, df_out['Diad1_Voigt_Cent']+1, 20000)
        y_cent_best_fit=result.eval(x=x_cent_lin)
        diad_height = np.max(y_cent_best_fit)
        df_out['Diad1_Combofit_Height']= diad_height
        df_out.insert(0, 'Diad1_Combofit_Cent', np.nanmean(x_cent_lin[y_cent_best_fit==diad_height]))


        residual_diad1=np.sum(((ydat_inrange-result_diad1_origx)**2)**0.5)/(len(ydat_inrange))
        df_out['Diad1_Residual']=residual_diad1
        df_out['Diad1_Prop_Lor']= Peak1_Prop_Lor
        df_out['Diad1_fwhm']=Peak1_fwhm

        if config1.fit_gauss is not False:
            df_out['Diad1_Gauss_Cent']=Gauss_cent
            df_out['Diad1_Gauss_Area']=Gauss_amp
            df_out['Diad1_Gauss_Sigma']=Gauss_sigma



        if plot_figure is True:

            fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12,4))
            ax1.plot(x_lin, y_best_fit, '-g', linewidth=2, label='best fit')
            ax1.plot(xdat, ydat,  '.k', label='data')
            ax1.legend()
            ax1.set_xlim(ax1_xlim)
            ax2.set_xlim(ax2_xlim)






            if config1.fit_gauss is not False:
                ax2.plot(x_lin, components.get('bkg_'), '.c',linewidth=2,  label='Gaussian bck')
            ax2.plot(x_lin, components.get('lz1_'), '-b', linewidth=2, label='Peak1')
            ax2.plot(xdat, ydat, '.k')


        #ax2.plot(xdat, result.best_fit, '-g', label='best fit')

            fitspan=max(y_best_fit)-min(y_best_fit)
            ax2.set_ylim([min(y_best_fit)-fitspan/5, max(y_best_fit)+fitspan/5])


            ax1.set_ylabel('Intensity')
            ax1.set_xlabel('Wavenumber')
            ax2.set_ylabel('Intensity')
            ax2.set_xlabel('Wavenumber')


            if fit_peaks>1:
                ax2.plot(x_lin, components.get('lz2_'), '-r', linewidth=2, label='Peak2')

            if fit_peaks>2:
                ax2.plot(x_lin, components.get('lz3_'), '-m', linewidth=2, label='Peak3')
            # if len(peak_pos_voigt)>1:
            #     lowerpeak=np.min([Peak1_Cent, Peak2_Cent])
            #     upperpeak=np.max([Peak1_Cent, Peak2_Cent])
            #     ax1.set_xlim([lowerpeak-15, upperpeak+15])
            # if len(peak_pos_voigt)>1:
            #     ax2.set_xlim([lowerpeak-20, upperpeak+20])

            ax2.legend()

            path3=path+'/'+'diad_fit_images'
            if os.path.exists(path3):
                out='path exists'
            else:
                os.makedirs(path+'/'+ 'diad_fit_images', exist_ok=False)


            file=filename.rsplit('.txt', 1)[0]
            fig.savefig(path3+'/'+'Diad1_Fit_{}.png'.format(file), dpi=dpi)




        best_fit=result.best_fit

        df_out['Diad1_refit']=refit_param

        # Final check - that Gaussian isnt anywhere near the height of the diad

    df_out['reduced_chi_squared']=reduced_chi_squared
    df_out = df_out.fillna(0)
    df_out = df_out.infer_objects(copy=False)

    return result, df_out, y_best_fit, x_lin, components, xdat, ydat, ax1_xlim, ax2_xlim, residual_diad_coords, ydat_inrange,  xdat_inrange





[docs]
def fit_diad_2_w_bck(*, config1: diad2_fit_config=diad2_fit_config(), config2: diad_id_config=diad_id_config(),
    path=None, filename=None, peak_pos_voigt=None,filetype=None, diad_array=None,
    plot_figure=True, close_figure=False, Diad_pos=None, HB_pos=None, C13_pos=None, minimise='weighted_least_squares'):
    """ This function fits the background (using the function remove_diad_baseline) and then
    fits the peaks using fit_gaussian_voigt_generic_diad()
    It then checks if any parameters are right at the permitted edge (meaning fitting didnt converge),
    and tries fitting if so. Then it makes a plot
    of the various parts of the fitting procedure, and returns a dataframe of the fit parameters.


    Parameters
    ------------------

    diad_id_config()
        Dataclass of parameters used to look for peaks. Uses any excluded ranges from this.

    diad2_fit_config()
        Dataclass of parameters used to find peaks. Use help on this dataclass to get options.

    path: str
        Folder user wishes to read data from

    filename: str
        Specific name of file being used.

    filetype: str
        Identifies type of file. choose from 'Witec_ASCII', 'headless_txt', 'headless_csv', 'head_csv', 'Witec_ASCII',
        'HORIBA_txt', 'Renishaw_txt'

    plot_figure: bool
        if True, saves figure

    close_fig: bool
        if True, displays figure in Notebook. If False, closes it (you can still find it saved)

    Diad_pos: float
        Estimate of diad peak position

    HB_pos: float
        Estimate of HB peak position

    C13_pos: float
        Estimate of C13 peak position

    minimise: str
        'weighted_least_squares' vs 'least_squares'


    Returns
    ------------
    if return_other_params is False (default)
        returns a dataframe of peak parameters
    if True, also returns:
            result: fit parameters
            y_best_fit: best fit to all curves in y
            x_lin: linspace of best fit coordinates in x.





    """
    if 'CRR' in filename:
        filetype='headless_txt'

    model=config1.model_name

    if model not in allowed_models:
        raise ValueError(f"Unsupported model: {model}. Supported models are: {', '.join(allowed_models)}")



    # Check number of peaks makes sense
    fit_peaks=config1.fit_peaks
    if diad_array is None:
        Diad_df=get_data(path=path, filename=filename, filetype=filetype)
        Diad=np.array(Diad_df)
    else:
        Diad=diad_array


    if fit_peaks==2:
        if np.isnan(HB_pos)==True or config1.HB_prom<0:
            fit_peaks=1
            fit_gauss=False



    if fit_peaks==3:
        # This tests if HB is Nan and C13 is Nan
        if np.isnan(HB_pos)==True and np.isnan(C13_pos)==True:
            fit_peaks=1
            fit_gauss=False
        # If only HB is Nan, lets instead allocate a HB positoin.
        elif np.isnan(HB_pos):
            HB_region=[Diad_pos+19, Diad_pos+23]
            filter=(Diad[:, 0]>=HB_region[0])& (Diad[:,0]<=HB_region[1])
            TrimDiad=Diad[filter]
            filter2=(Diad[:, 0]>=(HB_region[0]-5))& (Diad[:,0]<=(HB_region[1]+5))
            TrimDiad2=Diad[filter2]
            maxy=np.max(TrimDiad[:, 1])
            maxx=TrimDiad[TrimDiad[:, 1]==maxy]
            HB_pos=float(maxx[:, 0][0])
            miny=np.min(TrimDiad2[:, 1])
            config1.HB_prom=maxy-miny
            config1.gauss_amp=2*(maxy-miny)




        # If no C13, reduce fit peaks to 2.
        elif np.isnan(C13_pos)==True or config1.C13_prom<10:
            fit_peaks=2



    # First, we feed data into the remove baseline function, which returns corrected data

    y_corr_diad2, Py_base_diad2, x_diad2,  Diad_short_diad2, Py_base_diad2, Pf_baseline_diad2,  Baseline_ysub_diad2, Baseline_x_diad2, Baseline_diad2, span_diad2=remove_diad_baseline(
   path=path, filename=filename, filetype=filetype, exclude_range1=config2.exclude_range1, exclude_range2=config2.exclude_range2, N_poly=config1.N_poly_bck_diad2,
    lower_bck=config1.lower_bck_diad2, upper_bck=config1.upper_bck_diad2, plot_figure=False, diad_array=diad_array)





    # Then, we feed this baseline-corrected data into the combined gaussian-voigt peak fitting function
    result, df_out, y_best_fit, x_lin, components, xdat, ydat, ax1_xlim, ax2_xlim,residual_diad_coords, ydat_inrange,  xdat_inrange=fit_gaussian_voigt_generic_diad(config1, diad2=True, path=path, filename=filename,
    xdat=x_diad2, ydat=y_corr_diad2,
    span=span_diad2, plot_figure=False, Diad_pos=Diad_pos, HB_pos=HB_pos, C13_pos=C13_pos, fit_peaks=fit_peaks,
    py_baseline=Py_base_diad2, minimise=minimise)

    if any(df_out['Diad2_refit'].str.contains('Below threshold intensity')):
        print('below your threshold intensity, we have filled with nans')


    else:

        # Try Refitting once
        if str(df_out['Diad2_refit'].iloc[0])!='Flagged Warnings:':
            print('refit attempt 1')
            factor=2

            import copy
            config_tweaked=config1
            config_tweaked=copy.copy(config1)

            if any(df_out['Diad2_refit'].str.contains(' No Error')):
                if fit_peaks>1:
                    fit_peaks=fit_peaks-1




            # if any(df_out['Diad2_refit'].str.contains('V_HighAmp')):
            #     config_tweaked.diad_amplitude=calc_diad_amplitude/10
            if any(df_out['Diad2_refit'].str.contains('V_Sigma_too_Low')):
                config_tweaked.diad_sigma=config1.diad_sigma*factor
            if any(df_out['Diad2_refit'].str.contains('V_Sigma_too_High')):
                config_tweaked.diad_sigma=config1.diad_sigma/factor
            # Gaussian fit bits
            if any(df_out['Diad2_refit'].str.contains('G_input_TooHighSigma')):
                config_tweaked.gauss_sigma=config1.gauss_sigma/factor
            if any(df_out['Diad2_refit'].str.contains('G_input_TooLowSigma')):
                config_tweaked.gauss_sigma=config1.gauss_sigma*factor
            if any(df_out['Diad2_refit'].str.contains('G_input_TooLowAmp')):
                config_tweaked.gauss_amplitude=config1.gauss_amp*factor
            if any(df_out['Diad2_refit'].str.contains('G_Amp_too_high')):
                config_tweaked.gauss_amplitude=config1.gauss_amp/factor
            if any(df_out['Diad2_refit'].str.contains('G_higher_than_diad')):
                config_tweaked.gauss_amplitude=config1.gauss_amp/factor
                #config_tweaked.gauss_sigma=config1.gauss_sigma*factor
            # Hot band fit bits
            # if any(df_out['Diad2_refit'].str.contains('HB2_LowAmp')):
            #     config_tweaked.HB_amplitude=calc_HB_amplitude/2
            # if any(df_out['Diad2_refit'].str.contains('HB2_HighAmp')):
            #     config_tweaked.HB_amplitude=calc_HB_amplitude*2


            result, df_out, y_best_fit, x_lin, components, xdat, ydat, ax1_xlim, ax2_xlim,residual_diad_coords, ydat_inrange,  xdat_inrange=fit_gaussian_voigt_generic_diad(config_tweaked, diad2=True, path=path, filename=filename,
            xdat=x_diad2, ydat=y_corr_diad2,
        span=span_diad2, plot_figure=False, Diad_pos=Diad_pos, HB_pos=HB_pos, C13_pos=C13_pos, fit_peaks=fit_peaks,
        py_baseline=Py_base_diad2, minimise=minimise)
            i=2
            while str(df_out['Diad2_refit'].iloc[0])!='Flagged Warnings:':

                print('refit attempt  ='+str(i) + ', '+str(df_out['Diad2_refit'].iloc[0]))
                print(df_out['Diad2_refit'].iloc[0])


                config_tweaked2=copy.copy(config_tweaked)
                factor2=factor*2

                #
                # if any(df_out['Diad2_refit'].str.contains('V_LowAmp')):
                #     config_tweaked2.diad_amplitude=config_tweaked.diad_amplitude*10
                # if any(df_out['Diad2_refit'].str.contains('V_HighAmp')):
                #     config_tweaked2.diad_amplitude=config_tweaked.diad_amplitude/10
                if any(df_out['Diad2_refit'].str.contains('V_Sigma_too_Low')):
                    config_tweaked2.diad_sigma=config_tweaked.diad_sigma*factor2
                if any(df_out['Diad2_refit'].str.contains('V_Sigma_too_High')):
                    config_tweaked2.diad_sigma=config_tweaked.diad_sigma/factor2
                # Gaussian fit bits
                if any(df_out['Diad2_refit'].str.contains('G_input_TooHighSigma')):
                    config_tweaked2.gauss_sigma=config_tweaked.gauss_sigma/factor2
                if any(df_out['Diad2_refit'].str.contains('G_input_TooLowSigma')):
                    config_tweaked2.gauss_sigma=config_tweaked.gauss_sigma*factor2
                if any(df_out['Diad2_refit'].str.contains('G_input_TooLowAmp')):
                    config_tweaked2.gauss_amplitude=config_tweaked.gauss_amp*factor2
                if any(df_out['Diad2_refit'].str.contains('G_Amp_too_high')):
                    config_tweaked2.gauss_amplitude=config_tweaked.gauss_amp/factor2
                if any(df_out['Diad2_refit'].str.contains('G_higher_than_diad')):
                    config_tweaked2.gauss_amplitude=config_tweaked.gauss_amp/factor2
                    #config_tweaked2.gauss_sigma=config_tweaked.gauss_sigma*factor2
            # # Hot bands bits
            #     if any(df_out['Diad2_refit'].str.contains('HB2_LowAmp')):
            #         config_tweaked2.HB_amplitude=config_tweaked.HB_amplitude/2
            #     if any(df_out['Diad2_refit'].str.contains('HB2_HighAmp')):
            #         config_tweaked2.HB_amplitude=config_tweaked.HB_amplitude*2


                result, df_out, y_best_fit, x_lin, components, xdat, ydat, ax1_xlim, ax2_xlim,residual_diad_coords, ydat_inrange,  xdat_inrange=fit_gaussian_voigt_generic_diad(config_tweaked2, diad2=True, path=path, filename=filename,
            xdat=x_diad2, ydat=y_corr_diad2, span=span_diad2, plot_figure=False, Diad_pos=Diad_pos, HB_pos=HB_pos, C13_pos=C13_pos, fit_peaks=fit_peaks, py_baseline=Py_base_diad2,  minimise=minimise)
                i=i+1
                if i>5:
                    print('Got to 5 iteratoins and still couldnt adjust the fit parameters')
                    break


    # get a best fit to the baseline using a linspace from the peak fitting
    ybase_xlin=Pf_baseline_diad2(x_lin)

    # We extract the full spectra to plot at the end, and convert to a dataframe
    #Spectra_df=get_data(path=path, filename=filename, filetype=filetype)
    Spectra=Diad



    # Make nice figure
    if plot_figure is True:

        figure_mosaic="""
        XY
        AB
        CD
        EE
        """

        fig,axes=plt.subplot_mosaic(mosaic=figure_mosaic, figsize=(12, 16))
        if df_out['Diad2_refit'] is not False:
            fig.suptitle('Diad 2, file= '+ str(filename) + ' \n' + str(df_out['Diad2_refit'].iloc[0]), fontsize=16, x=0.5, y=1.0)
        else:
            fig.suptitle('Diad 2, file= '+ str(filename), fontsize=16, x=0.5, y=1.0)
        # Background plot for real

        # Plot best fit on the LHS, and individual fits on the RHS at the top

        axes['X'].set_title('a) Background fit')
        axes['X'].plot(Diad[:, 0], Diad[:, 1], '-', color='grey')
        axes['X'].plot(Diad_short_diad2[:, 0], Diad_short_diad2[:, 1], '-r', label='Spectra')

        #axes['X'].plot(Baseline[:, 0], Baseline[:, 1], '-b', label='Bck points')
        axes['X'].plot(Baseline_diad2[:, 0], Baseline_diad2[:, 1], '.b', label='bel. Bck. pts')
        axes['X'].plot(Diad_short_diad2[:, 0], Py_base_diad2, '-k', label='bck. poly fit')



        ax1_ymin=np.min(Baseline_diad2[:, 1])-10*np.std(Baseline_diad2[:, 1])
        ax1_ymax=np.max(Baseline_diad2[:, 1])+10*np.std(Baseline_diad2[:, 1])
        ax1_xmin=config1.lower_bck_diad2[0]-30
        ax1_xmax=config1.upper_bck_diad2[1]+30
        # Adding patches


        rect_diad2_b1=patches.Rectangle((config1.lower_bck_diad2[0], ax1_ymin),config1.lower_bck_diad2[1]-config1.lower_bck_diad2[0],ax1_ymax-ax1_ymin,
                                linewidth=1,edgecolor='none',facecolor='cyan', label='sel. bck. region', alpha=0.3, zorder=0)
        axes['X'].add_patch(rect_diad2_b1)
        rect_diad2_b2=patches.Rectangle((config1.upper_bck_diad2[0], ax1_ymin),config1.upper_bck_diad2[1]-config1.upper_bck_diad2[0],ax1_ymax-ax1_ymin,
                                linewidth=1,edgecolor='none',facecolor='cyan', alpha=0.3, zorder=0)
        axes['X'].add_patch(rect_diad2_b2)
        axes['X'].set_xlim([ax1_xmin, ax1_xmax])
        axes['X'].set_ylim([ax1_ymin, ax1_ymax])

        axes['X'].set_ylabel('Intensity')
        axes['Y'].set_ylabel('Intensity')
        axes['Y'].set_xlabel('Wavenumber')
        axes['X'].legend()



        axes['Y'].set_title('b) Background subtracted spectra')
        axes['Y'].plot(x_diad2, y_corr_diad2, '-r')
        height_p=np.max(Diad_short_diad2[:, 1])-np.min(Diad_short_diad2[:, 1])
        axes['Y'].set_ylim([np.min(y_corr_diad2), 1.2*height_p ])
        axes['X'].set_xlabel('Wavenumber')



        axes['A'].plot(xdat, ydat,  '.k', label='bck. sub. data')
        axes['A'].plot( x_lin ,y_best_fit, '-g', linewidth=1, label='best fit')
        axes['A'].legend()
        axes['A'].set_ylabel('Intensity')
        axes['A'].set_xlabel('Wavenumber')
        axes['A'].set_xlim(ax1_xlim)
        axes['A'].set_title('c) Overall Best Fit')

    # individual fits
        axes['B'].plot(xdat, ydat, '.k')

        # This is for if there is more than 1 peak, this is when we want to plot the best fit

        if fit_peaks>1:


            axes['B'].plot(x_lin, components.get('lz2_'), '-r', linewidth=2, label='HB2')

        axes['B'].plot(x_lin, components.get('lz1_'), '-b', linewidth=2, label='Diad2')
        if config1.fit_gauss is not False:
            axes['B'].plot(x_lin, components.get('bkg_'), '-m', label='Gaussian bck', linewidth=2)
        #ax2.plot(xdat, result.best_fit, '-g', label='best fit')
        axes['B'].legend()

        fitspan=max(y_best_fit)-min(y_best_fit)
        axes['B'].set_ylim([min(y_best_fit)-fitspan/5, max(y_best_fit)+fitspan/5])

        axes['B'].set_ylabel('Intensity')
        axes['B'].set_xlabel('Wavenumber')


        axes['B'].set_xlim(ax2_xlim)

        # Dashed lines so matches part D

        axes['B'].plot([df_out['Diad2_Voigt_Cent'], df_out['Diad2_Voigt_Cent']], [np.min(ydat), np.max(ydat)], ':b')


        if fit_peaks>=2:
                axes['B'].plot([df_out['HB2_Cent'], df_out['HB2_Cent']], [np.min(ydat), np.max(ydat)], ':r')

        axes['B'].set_title('c) Fit Components')

        # Background fit

        # First, set up x and y limits on axis

        if config1.x_range_baseline is not None:
            axc_xmin=df_out['Diad2_Voigt_Cent'][0]-config1.x_range_baseline
            axc_xmax=df_out['Diad2_Voigt_Cent'][0]+config1.x_range_baseline
        else:
            axc_xmin=config1.lower_bck_diad2[0]
            axc_xmax=config1.upper_bck_diad2[1]
        axc_ymin=np.min(Baseline_diad2[:, 1])-config1.y_range_baseline/3
        axc_ymax=np.max(Baseline_diad2[:, 1])+config1.y_range_baseline

        rect_diad2_b1=patches.Rectangle((config1.lower_bck_diad2[0],axc_ymin),config1.lower_bck_diad2[1]-config1.lower_bck_diad2[0],axc_ymax-axc_ymin,
                                linewidth=1,edgecolor='none',facecolor='cyan', label='bck', alpha=0.3, zorder=0)



        axes['C'].set_title('d) Peaks overlain on data before subtraction')
        axes['C'].plot(Diad_short_diad2[:, 0], Diad_short_diad2[:, 1], '.r', label='data')
        axes['C'].plot(Baseline_diad2[:, 0], Baseline_diad2[:, 1], '.b', label='bck')
        axes['C'].plot(Diad_short_diad2[:, 0], Py_base_diad2, '-k', label='Poly bck fit')


        axes['C'].set_ylabel('Intensity')
        axes['C'].set_xlabel('Wavenumber')


        if config1.fit_gauss is not False:

            axes['C'].plot(x_lin, components.get('bkg_')+ybase_xlin, '-m', label='Gaussian bck', linewidth=2)

        axes['C'].plot( x_lin ,y_best_fit+ybase_xlin, '-g', linewidth=2, label='Best Fit')
        axes['C'].plot(x_lin, components.get('lz1_')+ybase_xlin, '-b', label='Diad2', linewidth=1)
        if fit_peaks>1:
            axes['C'].plot(x_lin, components.get('lz2_')+ybase_xlin, '-r', label='HB2', linewidth=1)
        if fit_peaks>2:
            axes['C'].plot(x_lin, components.get('lz3_')+ybase_xlin, '-c', label='C13', linewidth=1)

        axes['C'].legend(ncol=3, loc='lower center')



        axes['C'].set_xlim([axc_xmin, axc_xmax])
        axes['C'].set_ylim([axc_ymin, axc_ymax])
        #axes['C'].plot(Diad_short[:, 0], Diad_short[:, 1], '"r', label='Data')


        # Residual on plot D
        axes['D'].set_title('f) Residuals')
        axes['D'].plot([df_out['Diad2_Voigt_Cent'], df_out['Diad2_Voigt_Cent']], [np.min(residual_diad_coords), np.max(residual_diad_coords)], ':b')
        if fit_peaks>1:
                axes['D'].plot([df_out['HB2_Cent'], df_out['HB2_Cent']], [np.min(residual_diad_coords), np.max(residual_diad_coords)], ':r')

        axes['D'].plot(xdat_inrange, residual_diad_coords, 'ok', mfc='c' )
        axes['D'].plot(xdat_inrange, residual_diad_coords, '-c' )
        axes['D'].set_ylabel('Residual')
        axes['D'].set_xlabel('Wavenumber')
        # axes['D'].set_xlim(ax1_xlim)
        # axes['D'].set_xlim(ax2_xlim)
        Local_Residual_diad2=residual_diad_coords[((xdat_inrange>(df_out['Diad2_Voigt_Cent'][0]-config1.x_range_residual))
                                                &(xdat_inrange<df_out['Diad2_Voigt_Cent'][0]+config1.x_range_residual))]
        axes['D'].set_xlim([df_out['Diad2_Voigt_Cent'][0]-config1.x_range_residual,
                    df_out['Diad2_Voigt_Cent'][0]+config1.x_range_residual])
        #ax5.plot([cent_1117, cent_1117 ], [np.min(Local_Residual_1117)-10, np.max(Local_Residual_1117)+10], ':k')
        axes['D'].set_ylim([np.min(Local_Residual_diad2)-10, np.max(Local_Residual_diad2)+10])





        # Overal spectra
        axes['E'].set_title('g) Summary plot of raw spectra for file = ' + filename)
        axes['E'].plot(Spectra[:, 0], Spectra[:, 1], '-r')
        axes['E'].set_ylabel('Intensity')
        axes['E'].set_xlabel('Wavenumber')




        path3=path+'/'+'diad_fit_images'
        if os.path.exists(path3):
            out='path exists'
        else:
            os.makedirs(path+'/'+ 'diad_fit_images', exist_ok=False)

        fig.tight_layout()

        file=filename.rsplit('.txt', 1)[0]
        fig.savefig(path3+'/'+'diad2_Fit_{}.png'.format(file), dpi=config1.dpi)


    if close_figure is True:
        plt.close(fig)

    # Lets calculate peak skewness here.
    Skew50=assess_diad2_skewness(config1=diad2_fit_config(), int_cut_off=0.5, path=path, filename=filename, filetype=filetype,
    skewness='abs', height=1, prominence=5, width=0.5, plot_figure=False, diad_array=diad_array)
    Skew80=assess_diad2_skewness(config1=diad2_fit_config(), int_cut_off=0.3, path=path, filename=filename, filetype=filetype,
    skewness='abs', height=1, prominence=5, width=0.5, plot_figure=False, diad_array=diad_array)
    df_out['Diad2_Asym50']=Skew50['Skewness_diad2']
    df_out['Diad2_Asym70']=Skew80['Skewness_diad2']
    df_out['Diad2_Yuan2017_sym_factor']=(df_out['Diad2_fwhm'])*(df_out['Diad2_Asym50']-1)
    df_out['Diad2_Remigi2021_BSF']=df_out['Diad2_fwhm']/df_out['Diad2_Combofit_Height']
    df_out['Diad2_PDF_Model']=model
    df_out = df_out.rename(columns={'reduced_chi_squared': 'Diad2_reduced_chi_squared'})


    if  config1.return_other_params is False:
        return df_out
    else:
        return df_out, result, y_best_fit, x_lin






[docs]
def fit_diad_1_w_bck(*, config1: diad1_fit_config=diad1_fit_config(), config2: diad_id_config=diad_id_config(),
    path=None, filename=None, filetype=None,  plot_figure=True, close_figure=True, Diad_pos=None, HB_pos=None, diad_array=None, minimise='weighted_least_squares'):
    """ This function fits the background (using the function remove_diad_baseline) and then fits the peaks
    using fit_gaussian_voigt_generic_diad()
    It then checks if any parameters are right at the permitted edge (meaning fitting didnt converge),
    and tries fitting if so. Then it makes a plot
    of the various parts of the fitting procedure, and returns a dataframe of the fit parameters.


    Parameters
    -----------
    diad_id_config()
        Dataclass of parameters used to look for peaks. Uses any excluded ranges from this.

    diad1_fit_config()
        Dataclass of parameters used to find peaks. Use help on this dataclass to get options


    path: str
        Folder user wishes to read data from

    filename: str
        Specific name of file being used.

    filetype: str
        Identifies type of file. choose from 'Witec_ASCII', 'headless_txt', 'headless_csv', 'head_csv', 'Witec_ASCII',
        'HORIBA_txt', 'Renishaw_txt'

    plot_figure: bool
        if True, saves figure

    Diad_pos: float
        Estimate of diad position

    HB_pos: float
        Estimate of HB position


    close_fig: bool
        if True, displays figure in Notebook. If False, closes it (you can still find it saved)

    minimise: str
        'weighted_least_squares' or 'least_squares'

    Returns
    ------------
    if return_other_params is False (default)
        returns a dataframe of peak parameters
    if True, also returns:
            result: fit parameters
            y_best_fit: best fit to all curves in y
            x_lin: linspace of best fit coordinates in x.


"""

    if 'CRR' in filename:
        filetype='headless_txt'

    model=config1.model_name

    if model not in allowed_models:
        raise ValueError(f"Unsupported model: {model}. Supported models are: {', '.join(allowed_models)}")

    fit_peaks=config1.fit_peaks

    if fit_peaks==2:
        if np.isnan(HB_pos)==True or config1.HB_prom<-50:
            fit_peaks=1
            config1.fit_gauss=False
            #print('Either no hb position, or prominence<-50, using 1 fit')

    if diad_array is None:
        Diad_df=get_data(path=path, filename=filename, filetype=filetype)
        Diad=np.array(Diad_df)
    else:
        Diad=diad_array
    # First, we feed data into the remove baseline function, which returns corrected data

    y_corr_diad1, Py_base_diad1, x_diad1,  Diad_short_diad1, Py_base_diad1, Pf_baseline_diad1,  Baseline_ysub_diad1, Baseline_x_diad1, Baseline_diad1, span_diad1=remove_diad_baseline(
   path=path, filename=filename, filetype=filetype, exclude_range1=config2.exclude_range1, exclude_range2=config2.exclude_range2, N_poly=config1.N_poly_bck_diad1,
    lower_bck=config1.lower_bck_diad1, upper_bck=config1.upper_bck_diad1, plot_figure=False, diad_array=diad_array)





    # Then, we feed this baseline-corrected data into the combined gaussian-voigt peak fitting function

    result, df_out, y_best_fit, x_lin, components, xdat, ydat, ax1_xlim, ax2_xlim,residual_diad_coords, ydat_inrange,  xdat_inrange=fit_gaussian_voigt_generic_diad(config1,diad1=True, path=path, filename=filename,
                    xdat=x_diad1, ydat=y_corr_diad1,
                    span=span_diad1, plot_figure=False,
                    Diad_pos=Diad_pos, HB_pos=HB_pos,fit_peaks=fit_peaks, py_baseline=Py_base_diad1,  minimise=minimise)



    # if df_out['Diad1_cent_err'].iloc[0]==0:
    #     if config1.fit_peaks>1:
    #         config_tweaked.fit_peaks=config1.fit_peaks-1
    #         print('reducing peaks by 1 to try to get an error')


    # Try Refitting once
    if str(df_out['Diad1_refit'].iloc[0])!='Flagged Warnings:':
        print('refit attempt 1')
        factor=2
        import copy
        config_tweaked=config1
        config_tweaked=copy.copy(config1)

        if any(df_out['Diad1_refit'].str.contains(' No Error')):
            if config1.fit_peaks>2:
                config_tweaked.fit_peaks=config1.fit_peaks-1
                fit_peaks=config_tweaked.fit_peaks


        if any(df_out['Diad1_refit'].str.contains('V_Sigma_too_Low')):
            config_tweaked.diad_sigma=config1.diad_sigma*factor
        if any(df_out['Diad1_refit'].str.contains('V_Sigma_too_High')):
            config_tweaked.diad_sigma=config1.diad_sigma/factor
        # Gaussian fit bits
        if any(df_out['Diad1_refit'].str.contains('G_input_TooHighSigma')):
            config_tweaked.gauss_sigma=config1.gauss_sigma/factor
        if any(df_out['Diad1_refit'].str.contains('G_input_TooLowSigma')):
            config_tweaked.gauss_sigma=config1.gauss_sigma*factor
        if any(df_out['Diad1_refit'].str.contains('G_input_TooLowAmp')):
            config_tweaked.gauss_amplitude=config1.gauss_amp*factor
        if any(df_out['Diad1_refit'].str.contains('G_Amp_too_high')):
            config_tweaked.gauss_amplitude=config1.gauss_amp/factor
        if any(df_out['Diad1_refit'].str.contains('G_higher_than_diad')):
            config_tweaked.gauss_amplitude=config1.gauss_amp/factor
            config_tweaked.gauss_sigma=config1.gauss_sigma*factor
        # # Hot band fit bits
        # if any(df_out['Diad1_refit'].str.contains('HB1_LowAmp')):
        #     config_tweaked.HB_amplitude=calc_HB_amplitude/2
        # if any(df_out['Diad1_refit'].str.contains('HB1_HighAmp')):
        #     config_tweaked.HB_amplitude=calc_HB_amplitude*2

        result, df_out, y_best_fit, x_lin, components, xdat, ydat, ax1_xlim, ax2_xlim,residual_diad_coords, ydat_inrange,  xdat_inrange=fit_gaussian_voigt_generic_diad(config_tweaked, diad1=True, path=path, filename=filename,
                    xdat=x_diad1, ydat=y_corr_diad1,
                    span=span_diad1, plot_figure=False, Diad_pos=Diad_pos, HB_pos=HB_pos, fit_peaks=config_tweaked.fit_peaks,
                    py_baseline=Py_base_diad1,  minimise=minimise)
        i=2
        while str(df_out['Diad1_refit'].iloc[0])!='Flagged Warnings:':

            print('refit attempt  ='+str(i) + ', '+str(df_out['Diad1_refit'].iloc[0]))
            print(df_out['Diad1_refit'].iloc[0])

            import copy
            config_tweaked2=copy.copy(config_tweaked)
            factor2=factor*2


            # if any(df_out['Diad1_refit'].str.contains('V_LowAmp')):
            #     config_tweaked2.diad_amplitude=config_tweaked.diad_amplitude*10
            # if any(df_out['Diad1_refit'].str.contains('V_HighAmp')):
            #     config_tweaked2.diad_amplitude=config_tweaked.diad_amplitude/10
            if any(df_out['Diad1_refit'].str.contains('V_Sigma_too_Low')):
                config_tweaked2.diad_sigma=config_tweaked.diad_sigma*factor2
            if any(df_out['Diad1_refit'].str.contains('V_Sigma_too_High')):
                config_tweaked2.diad_sigma=config_tweaked.diad_sigma/factor2
            # Gaussian fit bits
            if any(df_out['Diad1_refit'].str.contains('G_input_TooHighSigma')):
                config_tweaked2.gauss_sigma=config_tweaked.gauss_sigma/factor2
            if any(df_out['Diad1_refit'].str.contains('G_input_TooLowSigma')):
                config_tweaked2.gauss_sigma=config_tweaked.gauss_sigma*factor2
            if any(df_out['Diad1_refit'].str.contains('G_input_TooLowAmp')):
                config_tweaked2.gauss_amplitude=config_tweaked.gauss_amp*factor2
            if any(df_out['Diad1_refit'].str.contains('G_Amp_too_high')):
                config_tweaked2.gauss_amplitude=config_tweaked.gauss_amp/factor2
            if any(df_out['Diad1_refit'].str.contains('G_higher_than_diad')):
                config_tweaked2.gauss_amplitude=config_tweaked.gauss_amp/factor2
                config_tweaked2.gauss_sigma=config_tweaked.gauss_sigma*factor2
            # Hot bands bits
            # if any(df_out['Diad1_refit'].str.contains('HB1_LowAmp')):
            #     config_tweaked2.HB_amplitude=config_tweaked.HB_amplitude/2
            # if any(df_out['Diad1_refit'].str.contains('HB1_HighAmp')):
            #     config_tweaked2.HB_amplitude=config_tweaked.HB_amplitude*2


            result, df_out, y_best_fit, x_lin, components, xdat, ydat, ax1_xlim, ax2_xlim,residual_diad_coords, ydat_inrange,  xdat_inrange=fit_gaussian_voigt_generic_diad(config_tweaked2,diad1=True, path=path, filename=filename,
                    xdat=x_diad1, ydat=y_corr_diad1,
                    span=span_diad1, plot_figure=False, Diad_pos=Diad_pos, HB_pos=HB_pos, fit_peaks=config_tweaked.fit_peaks,
                    py_baseline=Py_base_diad1,  minimise=minimise)
            i=i+1
            if i>5:
                print('Got to 5 iteratoins and still couldnt adjust the fit parameters')
                break






    # get a best fit to the baseline using a linspace from the peak fitting
    ybase_xlin=Pf_baseline_diad1(x_lin)

    # We extract the full spectra to plot at the end, and convert to a dataframe


    Spectra=Diad



    # Make nice figure
    if plot_figure is True:

        figure_mosaic="""
        XY
        AB
        CD
        EE
        """

        fig,axes=plt.subplot_mosaic(mosaic=figure_mosaic, figsize=(12, 16))
        fig.suptitle('Diad 1, file= '+ str(filename) + ' \n' + str(df_out['Diad1_refit'].iloc[0]), fontsize=16, x=0.5, y=1.0)

        # Background plot for rea

        # Plot best fit on the LHS, and individual fits on the RHS at the top

        axes['X'].set_title('a) Background fit')
        axes['X'].plot(Diad[:, 0], Diad[:, 1], '-', color='grey')
        axes['X'].plot(Diad_short_diad1[:, 0], Diad_short_diad1[:, 1], '-r', label='Spectra')

        #axes['X'].plot(Baseline[:, 0], Baseline[:, 1], '-b', label='Bck points')
        axes['X'].plot(Baseline_diad1[:, 0], Baseline_diad1[:, 1], '.b', label='Sel. bck. pts')
        axes['X'].plot(Diad_short_diad1[:, 0], Py_base_diad1, '-k', label='bck. poly fit')



        ax1_ymin=np.min(Baseline_diad1[:, 1])-10*np.std(Baseline_diad1[:, 1])
        ax1_ymax=np.max(Baseline_diad1[:, 1])+10*np.std(Baseline_diad1[:, 1])
        ax1_xmin=config1.lower_bck_diad1[0]-30
        ax1_xmax=config1.upper_bck_diad1[1]+30
        # Adding patches


        rect_diad1_b1=patches.Rectangle((config1.lower_bck_diad1[0], ax1_ymin),config1.lower_bck_diad1[1]-config1.lower_bck_diad1[0],ax1_ymax-ax1_ymin,
                                linewidth=1,edgecolor='none',facecolor='cyan', label='sel. bck. region', alpha=0.3, zorder=0)
        axes['X'].add_patch(rect_diad1_b1)
        rect_diad1_b2=patches.Rectangle((config1.upper_bck_diad1[0], ax1_ymin),config1.upper_bck_diad1[1]-config1.upper_bck_diad1[0],ax1_ymax-ax1_ymin,
                                linewidth=1,edgecolor='none',facecolor='cyan', alpha=0.3, zorder=0)
        axes['X'].add_patch(rect_diad1_b2)
        axes['X'].set_xlim([ax1_xmin, ax1_xmax])
        axes['X'].set_ylim([ax1_ymin, ax1_ymax])

        axes['X'].set_ylabel('Intensity')
        axes['Y'].set_ylabel('Intensity')
        axes['Y'].set_xlabel('Wavenumber')
        axes['X'].legend()



        axes['Y'].set_title('b) Background subtracted spectra')
        axes['Y'].plot(x_diad1, y_corr_diad1, '-r')
        height_p=np.max(Diad_short_diad1[:, 1])-np.min(Diad_short_diad1[:, 1])
        axes['Y'].set_ylim([np.min(y_corr_diad1), 1.2*height_p ])
        axes['X'].set_xlabel('Wavenumber')



        axes['A'].plot(xdat, ydat,  '.k', label='bck. sub. data')
        axes['A'].plot( x_lin ,y_best_fit, '-g', linewidth=1, label='best fit')
        axes['A'].legend()
        axes['A'].set_ylabel('Intensity')
        axes['A'].set_xlabel('Wavenumber')
        axes['A'].set_xlim(ax1_xlim)
        axes['A'].set_title('c) Overall Best Fit')

    # individual fits
        axes['B'].plot(xdat, ydat, '.k')

        # This is for if there is more than 1 peak, this is when we want to plot the best fit
        if fit_peaks>1:

            axes['B'].plot(x_lin, components.get('lz2_'), '-r', linewidth=2, label='HB1')

        axes['B'].plot(x_lin, components.get('lz1_'), '-b', linewidth=2, label='Diad1')
        if config1.fit_gauss is not False:
            axes['B'].plot(x_lin, components.get('bkg_'), '-m', label='Gaussian bck', linewidth=2)
        #ax2.plot(xdat, result.best_fit, '-g', label='best fit')
        axes['B'].legend()

        fitspan=max(y_best_fit)-min(y_best_fit)
        axes['B'].set_ylim([min(y_best_fit)-fitspan/5, max(y_best_fit)+fitspan/5])

        axes['B'].set_ylabel('Intensity')
        axes['B'].set_xlabel('Wavenumber')


        axes['B'].set_xlim(ax2_xlim)

        # Dashed lines so matches part D
        axes['B'].plot([df_out['Diad1_Voigt_Cent'], df_out['Diad1_Voigt_Cent']], [np.min(ydat), np.max(ydat)], ':b')

        if fit_peaks>2:
            axes['B'].plot([df_out['HB1_Cent'], df_out['HB1_Cent']], [np.min(ydat), np.max(ydat)], ':r')

        axes['B'].set_title('c) Fit Components')

        # Background fit

        # First, set up x and y limits on axis

        if config1.x_range_baseline is not None:
            axc_xmin=df_out['Diad1_Voigt_Cent'][0]-config1.x_range_baseline
            axc_xmax=df_out['Diad1_Voigt_Cent'][0]+config1.x_range_baseline
        else:
            axc_xmin=config1.lower_bck_diad1[0]
            axc_xmax=config1.upper_bck_diad1[1]

        axc_ymin=np.min(Baseline_diad1[:, 1])-config1.y_range_baseline
        axc_ymax=np.max(Baseline_diad1[:, 1])+config1.y_range_baseline




        axes['C'].set_title('d) Peaks overlain on data before subtraction')
        axes['C'].plot(Diad_short_diad1[:, 0], Diad_short_diad1[:, 1], '.r', label='Data')
        axes['C'].plot(Baseline_diad1[:, 0], Baseline_diad1[:, 1], '.b', label='bck')
        axes['C'].plot(Diad_short_diad1[:, 0], Py_base_diad1, '-k', label='Poly bck fit')


        axes['C'].set_ylabel('Intensity')
        axes['C'].set_xlabel('Wavenumber')


        if config1.fit_gauss is not False:

            axes['C'].plot(x_lin, components.get('bkg_')+ybase_xlin, '-m', label='Gaussian bck', linewidth=2)

        axes['C'].plot( x_lin ,y_best_fit+ybase_xlin, '-g', linewidth=2, label='Best Fit')
        axes['C'].plot(x_lin, components.get('lz1_')+ybase_xlin, '-b', label='Diad1', linewidth=1)

        if fit_peaks>1:
            axes['C'].plot(x_lin, components.get('lz2_')+ybase_xlin, '-r', label='HB1', linewidth=1)




        axes['C'].legend(ncol=3, loc='lower center')
        axes['C'].set_xlim([axc_xmin, axc_xmax])
        axes['C'].set_ylim([axc_ymin, axc_ymax])
        #axes['C'].plot(Diad_short[:, 0], Diad_short[:, 1], '"r', label='Data')


        # Residual on plot D
        axes['D'].set_title('f) Residuals')
        axes['D'].plot([df_out['Diad1_Voigt_Cent'], df_out['Diad1_Voigt_Cent']], [np.min(residual_diad_coords), np.max(residual_diad_coords)], ':b')
        if fit_peaks>2:
            axes['D'].plot([df_out['HB1_Cent'], df_out['HB1_Cent']], [np.min(residual_diad_coords), np.max(residual_diad_coords)], ':r')

        axes['D'].plot(xdat_inrange, residual_diad_coords, 'ok', mfc='c' )
        axes['D'].plot(xdat_inrange, residual_diad_coords, '-c' )
        axes['D'].set_ylabel('Residual')
        axes['D'].set_xlabel('Wavenumber')
        # axes['D'].set_xlim(ax1_xlim)
        # axes['D'].set_xlim(ax2_xlim)
        Local_Residual_diad1=residual_diad_coords[((xdat_inrange>(df_out['Diad1_Voigt_Cent'][0]-config1.x_range_residual))
                                                &(xdat_inrange<df_out['Diad1_Voigt_Cent'][0]+config1.x_range_residual))]
        axes['D'].set_xlim([df_out['Diad1_Voigt_Cent'][0]-config1.x_range_residual,
                    df_out['Diad1_Voigt_Cent'][0]+config1.x_range_residual])
        #ax5.plot([cent_1117, cent_1117 ], [np.min(Local_Residual_1117)-10, np.max(Local_Residual_1117)+10], ':k')



        axes['D'].set_ylim([np.min(Local_Residual_diad1)-10, np.max(Local_Residual_diad1)+10])





        # Overal spectra
        axes['E'].set_title('g) Summary plot of raw spectra for file = ' + filename)
        axes['E'].plot(Spectra[:, 0], Spectra[:, 1], '-r')
        axes['E'].set_ylabel('Intensity')
        axes['E'].set_xlabel('Wavenumber')


        path3=path+'/'+'diad_fit_images'
        if os.path.exists(path3):
            out='path exists'
        else:
            os.makedirs(path+'/'+ 'diad_fit_images', exist_ok=False)

        fig.tight_layout()

        file=filename.rsplit('.txt', 1)[0]
        fig.savefig(path3+'/'+'Diad1_Fit_{}.png'.format(file), dpi=config1.dpi)

        if close_figure is True:
            plt.close(fig)

    # Lets calculate peak skewness here.
    Skew50=assess_diad1_skewness(config1=diad1_fit_config(), int_cut_off=0.5, path=path, filename=filename, filetype=filetype,
    skewness='abs', height=1, prominence=5, width=0.5, plot_figure=False, diad_array=diad_array)
    Skew80=assess_diad1_skewness(config1=diad1_fit_config(), int_cut_off=0.3, path=path, filename=filename, filetype=filetype,
    skewness='abs', height=1, prominence=5, width=0.5, plot_figure=False, diad_array=diad_array)

    df_out['Diad1_Asym50']=Skew50['Skewness_diad1']
    df_out['Diad1_Asym70']=Skew80['Skewness_diad1']
    df_out['Diad1_Yuan2017_sym_factor']=(df_out['Diad1_fwhm'])*(df_out['Diad1_Asym50']-1)
    df_out['Diad1_Remigi2021_BSF']=df_out['Diad1_fwhm']/df_out['Diad1_Combofit_Height']


    df_out['Diad1_PDF_Model']=model
    df_out = df_out.rename(columns={'reduced_chi_squared': 'Diad1_reduced_chi_squared'})

    if  config1.return_other_params is False:
        return df_out
    else:
        return df_out, result, y_best_fit, x_lin




[docs]
def combine_diad_outputs(*, filename=None, prefix=True,
Diad1_fit=None, Diad2_fit=None, to_csv=False,
to_clipboard=False, path=None):
    """ This function combines the dataframes from fit_diad_2_w_bck and fit_diad_1_w_bck into a
    single dataframe

    Parameters
    ---------------

    filename: str
        filename data is for

    prefix: bool
        if True, splits filename on ' ', if False, leaves as is

    Diad1_fit: pd.DataFrame
        pd.DataFrame of fit from fit_diad_1_w_bck function

    Diad2_fit: pd.DataFrame
        pd.DataFrame of fit from fit_diad_2_w_bck function

    to_clipboard: bool
        If True, copies merged dataframe to clipboard

    to_csv: bool
        if True, saves fit to Csv, by making a subfolder 'Diad_Fits' inside the folder 'path'




    """

    if prefix is True:
        filename = filename.split(' ', 1)[1]

    if Diad1_fit is not None and Diad2_fit is not None:

        combo=pd.concat([Diad1_fit, Diad2_fit], axis=1)
        # Fill any columns which dont' exist so can re-order the same every time
        if 'HB1_Cent' not in combo.columns:
            combo['HB1_Cent']=np.nan
            combo['HB1_Area']=np.nan
            combo['HB1_Sigma']=np.nan
        if 'HB2_Cent' not in combo.columns:
            combo['HB2_Cent']=np.nan
            combo['HB2_Area']=np.nan
            combo['HB2_Sigma']=np.nan
        if 'C13_Cent' not in combo.columns:
            combo['C13_Cent']=np.nan
            combo['C13_Area']=np.nan
            combo['C13_Sigma']=np.nan
        if 'Diad1_Gauss_Cent' not in combo.columns:
            combo['Diad1_Gauss_Cent']=np.nan
            combo['Diad1_Gauss_Area']=np.nan
            combo['Diad1_Gauss_Sigma']=np.nan
        if 'Diad2_Gauss_Cent' not in combo.columns:
            combo['Diad2_Gauss_Cent']=np.nan
            combo['Diad2_Gauss_Area']=np.nan
            combo['Diad2_Gauss_Sigma']=np.nan


        combo['Splitting']=combo['Diad2_Voigt_Cent']-combo['Diad1_Voigt_Cent']
        #combo['Split_err_abs']=combo['Diad1_cent_err']+combo['Diad2_cent_err']
        combo['Split_σ']=(combo['Diad1_cent_err']**2+combo['Diad2_cent_err']**2)**0.5
        cols_to_move = ['Splitting', 'Split_σ',  'Diad1_Combofit_Cent', 'Diad1_cent_err', 'Diad1_Combofit_Height', 'Diad1_Voigt_Cent', 'Diad1_Voigt_Area', 'Diad1_Voigt_Sigma',  'Diad1_Residual', 'Diad1_Prop_Lor', 'Diad1_fwhm', 'Diad1_refit','Diad2_Combofit_Cent', 'Diad2_cent_err',  'Diad2_Combofit_Height', 'Diad2_Voigt_Cent', 'Diad2_Voigt_Area', 'Diad2_Voigt_Sigma', 'Diad2_Voigt_Gamma', 'Diad2_Residual', 'Diad2_Prop_Lor', 'Diad2_fwhm', 'Diad2_refit',
                    'HB1_Cent', 'HB1_Area', 'HB1_Sigma', 'HB2_Cent', 'HB2_Area', 'HB2_Sigma', 'C13_Cent', 'C13_Area', 'C13_Sigma',
                    'Diad2_Gauss_Cent', 'Diad2_Gauss_Area','Diad2_Gauss_Sigma', 'Diad1_Gauss_Cent', 'Diad1_Gauss_Area','Diad1_Gauss_Sigma', 'Diad1_Asym50', 'Diad1_Asym70', 'Diad1_Yuan2017_sym_factor',
'Diad1_Remigi2021_BSF','Diad2_Asym50', 'Diad2_Asym70', 'Diad2_Yuan2017_sym_factor',
'Diad2_Remigi2021_BSF', 'Diad1_PDF_Model', 'Diad2_PDF_Model', 'Diad1_reduced_chi_squared', 'Diad2_reduced_chi_squared']




        combo_f = combo[cols_to_move + [
                col for col in combo.columns if col not in cols_to_move]]
        combo_f=combo_f.iloc[:, 0:len(cols_to_move)]
        file=filename.rsplit('.txt', 1)[0]
        combo_f.insert(0, 'filename', file)

    if Diad1_fit is None and Diad2_fit is None:
        df=pd.DataFrame(data={'filename': filename,
                            'Splitting': np.nan,
                                'Diad1_Voigt_Cent':np.nan,
                                    'Diad1_Voigt_Area':np.nan,
                                    'Residual_Diad1': np.nan,
                                        'Diad2_Voigt_Cent':np.nan,
                                        'Diad2_Voigt_Area':np.nan,
                                        'Residual_Diad2': np.nan,
                                            'HB1_Cent':np.nan,
                                                'HB1_Area':np.nan,
                                                'HB2_Cent':np.nan,
                                                    'HB2_Area':np.nan,
                                                        'C13_Cent': np.nan,
                                                        'C13_Area': np.nan,
                                                        })
        combo_f=pd.concat([df, Carb_fit], axis=1)
        if to_clipboard is True:
            df.to_clipboard(excel=True, header=False, index=False)
        combo_f=df

    if to_csv is True:
        if path is None:
            raise Exception('You need to specify path= for wherever you want this saved')
        path_fits=path+'/'+'Diad_Fits'
        if os.path.exists(path_fits):
            out='path exists'
        else:
            dir2=os.makedirs(path+'/'+ 'Diad_Fits', exist_ok=False)
        #filepath=Path(path+'/'+ 'Peak_Fits'+'/'+filename)
        combo_f.to_csv(path+'/'+'Diad_Fits'+'/'+'fits_'+filename)
    return combo_f






[docs]
def calculate_HB_Diad_area_ratio(df):
    """" Calculates two area ratios of hotbands and diads"""
    df_c=df.copy()
    df_c['HB_Diad_Ratio']=(df_c['HB1_Area']+df_c['HB2_Area'])/(df_c['Diad1_Voigt_Area']+df_c['Diad2_Voigt_Area'])
    df_c['HB_Total_Ratio']=(df_c['HB1_Area']+df_c['HB2_Area'])/(df_c['Diad1_Voigt_Area']+df_c['Diad2_Voigt_Area']+df_c['HB1_Area']+df_c['HB2_Area'])
    return df_c







## Fit generic peak


[docs]
@dataclass
class generic_peak_config:
    """
    Configuration class for fitting secondary peaks (e.g. SO2, Carbonate)

    Attributes:
        name (str): Default 'Generic'
            Name of component you are fitting that gets stamped onto the peak parameters, e.g. if 'SO2', get Peak_Area_SO2'

        lower_bck, upper_bck: Tuple[float, float]
            Background positions in wavenumbers

        model_name: Default 'PseudoVoigt'
            Type of peak fit. Choose from 'GaussianModel', 'VoigtModel', 'PseudoVoigtModel', 'Spline'.

        x_range_bck: float (default 10)
            When showing bck collected plot, shows peak center +- this amount

        N_poly_carb_bck: float (default 1)
            Degree of polynomial to fit to background.

        amplitude: float (default 100)
            Approximate amplitude of peak fit (first guess for Gaussian , voigt and Psuedovoigt needed)

        cent: float (default 1090)
            Approximate peak position of species you are looking for. E.g. 1090 for CO2, 1150 for SO2

        outlier_sigma: float (default 12)
            Code calculates standard deviation of background, and excludes points more than outlier_sigma*this standard deviatoin from nearby background points

        distance, prominence, width, threshold, height: float
            Scipy find peak parameters to get initial fit of peak




        fit_peaks (int): Default 2
            Number of peaks to fit. 2 = Diad + HB, 1 = Diad.

    """

    # Name that gets stamped onto fits
    name: str= 'generic'
    # Selecting the two background positions
    lower_bck: Tuple[float, float]=(1060, 1065)
    upper_bck: Tuple[float, float]=(1120, 1130)

    #
    model_name: str='PseudoVoigtModel'
    x_range_bck: float=10

    # Background degree of polynomial
    N_poly_carb_bck: float =1
    # Seletcting the amplitude
    amplitude: float =1000

    # Selecting the peak center
    cent: float =1090

    # outlier sigma to discard background at
    outlier_sigma: float =12



    # Parameters for Scipy find peaks
    distance: float=10
    prominence: float=5
    width: float=6
    threshold: float=0.1
    height:float =100

    # Excluding a range for cosmic rays etc.
    exclude_range: Optional [Tuple[float, float]]=None


    # Return other parameteres e.g. intermediate outputs
    return_other_params: bool = False

    N_peaks: int= 1

    int_cut_off: float=0.05







[docs]
def fit_generic_peak(*, config: generic_peak_config=generic_peak_config(),
path=None, filename=None, filetype=None,
 plot_figure=True, dpi=200):

    """ This function fits a generic peak with various options

    config: from dataclass generic_peak_config

        model_name: str, 'GaussianModel', 'VoigtModel', 'PseudoVoigtModel', 'Spline', 'Poly',
            Fits Gaussian, Voigt, or PseudoVoigt, or fits a spline or polynomail and gets area underneath

        name: str
            Name of feature you are fitting. Used to make column headings in output (e.g., if SO2, outputs are Peak_Cent_SO2, Peak_Area_SO2)

        int_cut_off: float
            fits peak out to int_cut_off* peak height.

        lower_bkc: Tuple[float, float]
            Upper and lower x coordinate for background to left of peak

        upper_bkc: Tuple[float, float]
            Upper and lower x coordinate for background to right of peak

        x_range_bck: float
            sets xlim of figure, as the peak center + x_range_bck, and peak cent - x_range_bck

        N_poly_carb_bck: int
            Degree of polynomial to fit to the background positions

        amplitude: float
            Approx amplitude of peak.

        cent: float
            Approx cent of peak

        outlier_sigma: float
            Discards points on the background outside outlier_sigma of the median value

        distance, prominence, threshold, height: floats
            parameters for scipy find_peaks

        exclude_range: Optional [Tuple[float, float]]
            Excludes region of spectra between these values

        dpi: float
            dpi to save figure at

        return_other_params: bool
            if True, returns other peak fit parameters and fits (df, xx_carb, y_carb, result0),
            else just returns df of peak positions, heights etc


    path: str
        Path to file

    filename: str
        filename with file extension

    filetype: str
        choose from 'Witec_ASCII', 'headless_txt', 'headless_csv', 'head_csv', 'Witec_ASCII',
        'HORIBA_txt', 'Renishaw_txt'


    plot_figure: bool
        If true, plots figure and saves it in a subfolder in the same directory as the filepath specified

    dpi: int
        dpi to save figure at

    Returns
    -----------
    df of peak fits, unless return_other_params is True, then also returns x and y coordinate of fit, and fit params

    """


    Spectra_in=get_data(path=path, filename=filename, filetype=filetype)

    int_cut_off=config.int_cut_off
    name=config.name

    # If exclude range, trim that here
    if config.exclude_range is not None:
            Spectra=Spectra_in[ (Spectra_in[:, 0]<config.exclude_range[0]) | (Spectra_in[:, 0]>config.exclude_range[1]) ]
    else:
        Spectra=Spectra_in



    lower_0baseline=config.lower_bck[0]
    upper_0baseline=config.lower_bck[1]
    lower_1baseline=config.upper_bck[0]
    upper_1baseline=config.upper_bck[1]

    # Filter out spectra outside these baselines
    Spectra_short=Spectra[ (Spectra[:,0]>lower_0baseline) & (Spectra[:,0]<upper_1baseline) ]
    Spectra_fit=Spectra[ (Spectra[:,0]>upper_0baseline) & (Spectra[:,0]<lower_1baseline) ]

    # To make a nice plot, give 50 wavenumber units on either side as a buffer
    Spectra_plot=Spectra[ (Spectra[:,0]>lower_0baseline-50) & (Spectra[:,0]<upper_1baseline+50) ]

    # # Find peaks using Scipy find peaks
    y=Spectra_fit[:, 1]
    x=Spectra_fit[:, 0]
    spec_res=np.abs(x[0]-x[1])
    #
    #
    # peaks = find_peaks(y,height = config.height, threshold = config.threshold,
    # distance = config.distance, prominence=config.prominence, width=config.width)
    #
    # height = peaks[1]['peak_heights'] #list of the heights of the peaks
    # peak_pos = x[peaks[0]] #list of the peaks positions
    # df_sort=pd.DataFrame(data={'pos': peak_pos,
    #                     'height': height})
    #
    # df_peak_sort=df_sort.sort_values('height', axis=0, ascending=False)
    #
    # # Trim number of peaks based on user-defined N peaks
    # df_peak_sort_short=df_peak_sort[0:config.N_peaks]


    # Get actual baseline
    Baseline_with_outl=Spectra_short[
    ((Spectra_short[:, 0]<upper_0baseline) &(Spectra_short[:, 0]>lower_0baseline))
        |
    ((Spectra_short[:, 0]<upper_1baseline) &(Spectra_short[:, 0]>lower_1baseline))]

    # Calculates the LH baseline
    LH_baseline=Spectra_short[
    ((Spectra_short[:, 0]<upper_0baseline) &(Spectra_short[:, 0]>lower_0baseline))]

    Mean_LH_baseline=np.nanmean(LH_baseline[:, 1])
    Std_LH_baseline=np.nanstd(LH_baseline[:, 1])

    # Calculates the RH baseline
    RH_baseline=Spectra_short[((Spectra_short[:, 0]<upper_1baseline)
    &(Spectra_short[:, 0]>lower_1baseline))]
    Mean_RH_baseline=np.nanmean(RH_baseline[:, 1])
    Std_RH_baseline=np.nanstd(RH_baseline[:, 1])

    # Removes points outside baseline

    LH_baseline_filt=LH_baseline[(LH_baseline[:, 1]<Mean_LH_baseline+config.outlier_sigma*Std_LH_baseline)
    &(LH_baseline[:, 1]>Mean_LH_baseline-config.outlier_sigma*Std_LH_baseline) ]

    RH_baseline_filt=RH_baseline[(RH_baseline[:, 1]<Mean_RH_baseline+config.outlier_sigma*Std_RH_baseline)
    &(RH_baseline[:, 1]>Mean_RH_baseline-config.outlier_sigma*Std_RH_baseline) ]

    Baseline=np.concatenate((LH_baseline_filt, RH_baseline_filt), axis=0)





    # Fits a polynomial to the baseline of degree
    Pf_baseline = np.poly1d(np.polyfit(Baseline[:, 0], Baseline[:, 1], config.N_poly_carb_bck))
    Py_base =Pf_baseline(Spectra_short[:, 0])
    Py_base_fit =Pf_baseline(Spectra_fit[:, 0])



    Baseline_ysub=Pf_baseline(Baseline[:, 0])
    Baseline_x=Baseline[:, 0]
    y_corr= Spectra_short[:, 1]-Py_base
    y_corr_fit=Spectra_fit[:, 1]-Py_base_fit
    x_fit=Spectra_fit[:, 0]
    x_corr=Spectra_short[:, 0]



    if config.model_name not in ['GaussianModel', 'VoigtModel', 'PseudoVoigtModel', 'Spline']:
        raise TypeError('model_name not a permitted input, Please select either GaussianModel, VoigtModel, PseudoVoigtModel, or Spline')


    if config.model_name in ['GaussianModel', 'VoigtModel', 'PseudoVoigtModel']:


        # NOw into the voigt fitting
        if config.model_name=='VoigtModel':
            model0 = VoigtModel()#+ ConstantModel()
        if config.model_name == 'PseudoVoigtModel':
            model0 = PseudoVoigtModel()
        if config.model_name=='GaussianModel':
            model0=GaussianModel()

        # create parameters with initial values
        pars0 = model0.make_params()
        pars0['center'].set(config.cent, min=config.cent-30, max=config.cent+30)
        pars0['amplitude'].set(config.amplitude, min=0)


        init0 = model0.eval(pars0, x=x)
        result0 = model0.fit(y_corr_fit, pars0, x=x)
        Center_p0=result0.best_values.get('center')
        area_p0=result0.best_values.get('amplitude')



        # Make a nice linspace for plotting with smooth curves.
        xx_carb=np.linspace(min(x), max(x), 2000)
        y_carb=result0.eval(x=xx_carb)
        height=np.max(y_carb)

        # Moved this form down below, think it belongs here
        if area_p0 is None:
            area_p0=np.nan
            if area_p0 is not None:
                if area_p0<0:
                    area_p0=np.nan


        df=pd.DataFrame(data={'filename': filename,
    'Peak_Cent_{}'.format(name): Center_p0,
    'Peak_Area_{}'.format(name): area_p0,
    'Peak_Height_{}'.format(name): height,
    'Model_name': config.model_name}, index=[0])

    else:

        if  config.model_name == 'Spline':
            from scipy.interpolate import CubicSpline
            # fit a spline first to find intensity cut off
            mix_spline_sil = interp1d(x_fit, y_corr_fit,
                                    kind='cubic')


            x_new=np.linspace(np.min(x_fit), np.max(x_fit), 1000)
            Baseline_ysub_sil=mix_spline_sil(x_new)

            x_max = x_new[np.argmax(Baseline_ysub_sil)]
            cent=x_max
            Peak_Center=x_max

            max_y=np.max(y_corr_fit)

            # Lets trim x and y based on intensity values
        # Find intensity cut off

            y_int_cut=max_y*int_cut_off

            # Check peak isnt at edge of window, else wont work

            if Peak_Center==np.max(x_fit) or Peak_Center==np.min(x_fit):
                print('peak at edge of window, setting params to nans')



                df=pd.DataFrame(data={'filename': filename,
            'Peak_Cent_{}'.format(name): np.nan,
            'Peak_Area_{}'.format(name): np.nan,
            'Peak_Height_{}'.format(name): np.nan,
            'Model_name': config.model_name}, index=[0])

            else:



            # Split the array into a LHS and a RHS

                LHS_y=Baseline_ysub_sil[x_new<Peak_Center] # Was originally <=
                RHS_y=Baseline_ysub_sil[x_new>Peak_Center]

                LHS_x=x_new[x_new<Peak_Center] # Was originally <=
                RHS_x=x_new[x_new>Peak_Center]

                # Need to flip LHS to put into the find closest function
                LHS_y_flip=np.flip(LHS_y)
                LHS_x_flip=np.flip(LHS_x)




                val=np.argmax(LHS_y_flip<y_int_cut)
                if val == 0 and not (LHS_y_flip[0] < y_int_cut):
                    val = np.argmin(LHS_y_flip)

                val2=np.argmax(RHS_y<y_int_cut)
                if val2 == 0 and not (RHS_y[0] < y_int_cut):
                    val2 = np.argmin(RHS_y)



                # Find nearest x unit to this value
                y_nearest_LHS=LHS_y_flip[val]
                x_nearest_LHS=LHS_x_flip[val]

                y_nearest_RHS=RHS_y[val2]
                x_nearest_RHS=RHS_x[val2]




                diff_LHS=Peak_Center-x_nearest_LHS
                diff_RHS=x_nearest_RHS-Peak_Center



                n_res=0


                x_new_skewness=np.linspace(x_nearest_LHS-spec_res*n_res, x_nearest_RHS+spec_res*n_res)


                x_new=x_new_skewness


                Baseline_ysub_sil=mix_spline_sil(x_new_skewness)






                N_poly_sil='Spline'


                xspace_sil=x_new[1]-x_new[0]
                area_trap = trapezoid(Baseline_ysub_sil, dx=xspace_sil)
                area_simps = simpson(Baseline_ysub_sil, dx=xspace_sil)



                area_p0=area_trap




                df=pd.DataFrame(data={'filename': filename,
            'Peak_Cent_{}'.format(name): x_max,
            'Peak_Area_{}'.format(name): area_p0,
            'Peak_Height_{}'.format(name): max_y,
            'Model_name': config.model_name}, index=[0])






    # Plotting what its doing

    if plot_figure is True:
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10,5))
        fig.suptitle('Secondary Phase, file= '+ str(filename), fontsize=12, x=0.5, y=1.0)

        # Plot the peak positions and heights



        ax1.set_title('Background fit')

        ax1.plot(Spectra_plot[:, 0], Spectra_plot[:, 1], '-r', label='Spectra')
        ax1.plot(RH_baseline_filt[:, 0], RH_baseline_filt[:, 1], '.b',
        label='bck points')
        ax1.plot(LH_baseline_filt[:, 0], LH_baseline_filt[:, 1], '-b',
        lw=3, label='_bck points')

        #ax2.plot(x, y_corr_fit, '-r', label='Bck-sub data', lw=0.5)
        ax2.plot(x, y_corr_fit, '.r', label='Bck-sub data')
        ax1.plot(Spectra_short[:, 0], Py_base, '-k', label='Bck Poly')


        if config.model_name != 'Spline' and config.model_name != 'Poly':
            ax2.plot(xx_carb, y_carb, '-k', label='Peak fit')


        else:
            ax2.plot(x_new, Baseline_ysub_sil, '-k')
            ax2.fill_between(x_new, Baseline_ysub_sil, color='yellow', label='fit area', alpha=0.5)


        cent=df['Peak_Cent_{}'.format(name)].iloc[0]
        ax2.set_xlim([cent-config.x_range_bck, cent+config.x_range_bck])

        ax2.set_title('Bkg-subtracted, ' + name + ' peak fit')


        ax1.set_ylabel('Intensity')
        ax1.set_xlabel('Wavenumber (cm$^{-1}$)')
        ax2.set_ylabel('Bck-corrected Intensity')
        ax2.set_xlabel('Wavenumber (cm$^{-1}$)')

        ax1.legend()
        ax2.legend()

        ax1.set_ylim([
        np.min(Spectra_plot[:, 1])-50,
        np.max(Spectra_plot[:, 1])+50
        ])






        file=filename.rsplit('.txt', 1)[0]
        if plot_figure is True:
            path3=path+'/'+'Secondary_fit_Images'

            if os.path.exists(path3):
                out='path exists'
            else:
                os.makedirs(path3, exist_ok=False)

            file=filename.rsplit('.txt', 1)[0]
            fig.savefig(path3+'/'+ name +'_Fit_{}.png'.format(file), dpi=dpi)



    if config.return_other_params is True:
        return df, xx_carb, y_carb, result0
    else:
        return df



[docs]
def peak_prominence(x, y, peak_position):
    """ This function calculates the approximate prominence of a peak,
    by finding the local minimum either side of the peak

    Parameters
    ----------
    x: np.array
        x coordinates (wavenumber)
    y: np.array
        y coordinates (intensity)

    peak:position: int, float
        Peak position to calculate prominence for

    Returns
    --------
    float: prominence of peak in y coordinates.

    """
    # Find the index of the peak position
    peak_index = np.where(x == peak_position)[0][0]

    # Find the local minimums on both sides of the peak
    left_valley_index = peak_index
    right_valley_index = peak_index

    # Find the left valley
    while left_valley_index > 0 and y[left_valley_index] >= y[left_valley_index - 1]:
        left_valley_index -= 1

    # Find the right valley
    while right_valley_index < len(x) - 1 and y[right_valley_index] >= y[right_valley_index + 1]:
        right_valley_index += 1

    # Calculate prominence as the difference between peak height and higher valley
    peak_height = y[peak_index]
    left_valley_height = y[left_valley_index]
    right_valley_height = y[right_valley_index]

    prominence = peak_height - max(left_valley_height, right_valley_height)

    return prominence




# Plotting up secondary peaks

from scipy.signal import find_peaks

[docs]
def plot_secondary_peaks(*, Diad_Files, path, filetype,
        xlim_plot=[1040, 1200], xlim_peaks=[1060, 1100],
        height=100, threshold=0.1, distance=10, prominence=5, width=6,
         prominence_filter=False, sigma=3, sigma_window=30, find_peaks_filter=False,
         just_plot=False, yscale=0.2, gaussian_smooth=False, smoothing_window=5, smooth_std=3):

    """ This function plots spectra stacked vertically ontop of each other, in a specific region.
    It also has two options to identify peak positions.

    Parameters
    ---------------
    Diad_Files: list
        List of filenames to plot

    path: str
        Path where spectra are stored

    filetype: str
        Choose from 'Witec_ASCII', 'headless_txt', 'headless_csv', 'head_csv', 'Witec_ASCII',
        'HORIBA_txt', 'Renishaw_txt'

    xlim_plot: Tuple[float, float]
        Range of x coordinates to make plot over

    xlim_peaks: Tuple[float, float]
        Range of x coordinates to search for peaks in

    Optional parameters for Smoothing spectra prior to finding peaks

        gaussian_smooth:bool - If True, applies a gaussian smoothing filter using np.convolve defined by a
         smoothing_window and a smooth_std.


    If you want to actually ID peaks rather than just plot them, choose from:

    prominence_filter: bool
        if True, finds max y coordinate in the window xlim_peaks. It then calculates the prominence by
        comparing this peak height to the average of the median of the 3 datapoints at the left and right of the window.
        If the peak is more than 'prominence' units above the average of these two end point medians, it
        is classified as a peak.
        Also need to specify prominence: float

    find_peaks_filter: bool

        if True, uses scipy find peaks to find peaks. Can tweak distance, prominence, width,
        threshold, and height.

    just_plot: bool
        if True, just plots spectra, doesnt try to find peaks

    y_scale: float
        Figure size is 2+y_scale * number of spectra

    Returns
    -------------
    df_peaks, x_data, y_data, fig

    df_peaks: pd.DataFrame for each file of peak position, height, prominence and file name. Filled with Nans if didnt find a peak
    x_data: np.array of x coordinates (Assumes the same for all files)
    y_data: np.array of y coordinates of each spectra
    fig: figure it has plotted


    """
    if prominence_filter is True and (find_peaks_filter is True or just_plot is True):
        raise TypeError('Only one of sigma_filter, ind_peaks_filter and just_plot can be True')
    fig, (ax1) = plt.subplots(1, 1, figsize=(10,2+yscale*len(Diad_Files)))

    i=0
    Y=0
    peak_pos_saved=np.zeros(len(Diad_Files), dtype=float)
    peak_prom_saved=np.zeros(len(Diad_Files), dtype=float)
    peak_height_saved=np.zeros(len(Diad_Files), dtype=float)
    peak_bck=np.zeros(len(Diad_Files), dtype=float)
    y_star=np.zeros(len(Diad_Files), dtype=float)
    yplot=np.zeros(len(Diad_Files), dtype=float)
    Diad_df=get_data(path=path, filename=Diad_Files[0], filetype=filetype)
    x_data=Diad_df[:, 0]
    y_data=np.zeros([  len(x_data), len(Diad_Files)], float)


    for file in Diad_Files:




        Diad_df=get_data(path=path, filename=file, filetype=filetype)
        Diad=np.array(Diad_df)


        # First lets use find peaks
        y2=Diad[:, 1]
        x=Diad[:, 0]

        if gaussian_smooth is True:
            y = np.convolve(y2, gaussian(smoothing_window, std=smooth_std), mode='same')
        else:
            y=y2



        # Region of interest
        Region_peaks = ((x>xlim_peaks[0])& (x<xlim_peaks[1]))
        Region_plot=((x>xlim_plot[0])& (x<xlim_plot[1]))
        x_trim=x[Region_peaks]
        y_trim=y[Region_peaks]
        x_plot=x[Region_plot]
        y_plot=y[Region_plot]
        y_data[:, i]=y



        # IF using scipy find peaks
        if find_peaks_filter is True:
            # Find peaks for trimmed spectra (e.g. in region peaks have been asked for)
            peaks = find_peaks(y_trim,height = height, threshold = threshold,
            distance = distance, prominence=prominence, width=width)


            height_PEAK = peaks[1]['peak_heights'] #list of the heights of the peaks
            peak_pos = x_trim[peaks[0]] #list of the peaks positions

            df_sort=pd.DataFrame(data={'pos': peak_pos,
                                'height': height_PEAK})

            df_peak_sort=df_sort.sort_values('height', axis=0, ascending=False)

            # Trim number of peaks based on user-defined N peaks



            #print(df_peak_sort_short)
            if len(df_peak_sort>=1):

                df_peak_sort_short=df_peak_sort[0:1]
                peak_pos_saved[i]=df_peak_sort_short['pos'].values[0]
                peak_height_saved[i]=df_peak_sort_short['height'].values[0]
            else:

                peak_pos_saved[i]=np.nan
                peak_height_saved[i]=np.nan

            peak_bck[i]=np.quantile(y_plot, 0.2)
            av_y=np.quantile(y_plot, 0.5)
            Diff=np.max(y_plot)-np.min(y_plot)
            Y_sum=np.max(y_plot)/Diff

            ax1.plot(x_plot, ((y_plot-np.min(y_plot))/Diff)+i, '-r')
            ax1.plot(x_plot, ((y_plot-np.min(y_plot))/Diff)+i, '.k', ms=1)
            if len(height_PEAK)>0:


                ax1.plot(df_peak_sort_short['pos'], (df_peak_sort_short['height']-np.min(y_plot))/Diff+i, '*k', mfc='yellow', ms=10)

            yplot[i]=np.min(((y_plot-np.min(y_plot))/Diff)+i)

            ax1.annotate(str(file), xy=(xlim_plot[1]+0.5, yplot[i]),
                         xycoords="data", fontsize=8)
            #ax1.set_xlim(xlim)
            Y=Y+Y_sum


            # calculate prominence
            y_edge1=np.median(y_trim[0:3])
            y_edge2=np.median(y_trim[-3:])
            peak_prom_saved[i]=peak_height_saved[i]-(y_edge1+y_edge2)/2

            i=i+1


        elif prominence_filter is True:
                # Find max value in region

            maxy=np.max(y_trim)
            xpos=x_trim[y_trim==maxy][0]
            #print(xpos)
            # if (xpos+sigma_window)>np.max(x_trim):
            #     Raise TypeError('Your peak finding window is smaller than sigma window')
            # y_around_max=y_trim[(x_trim>(xpos+sigma_window))&(x_trim<(xpos-sigma_window))]
            # stdy=np.nanstd(y_around_max)
            # mediany=np.quantile(y_around_max, 0.5)
            # if maxy>mediany+stdy*sigma+prominence:
            #     pos_x=x_trim[y_trim==maxy][0]
            # else:
            #     pos_x=np.nan

            #prominence_calc = peak_prominence(x_trim, y_trim, xpos)

            y_edge1=np.median(y_trim[0:3])
            y_edge2=np.median(y_trim[-3:])
            #print(y_edge2)
            prominence_calc=maxy-(y_edge1+y_edge2)/2
            peak_prom_saved[i]=prominence_calc



            if prominence_calc>prominence:
                pos_x=x_trim[y_trim==maxy][0]
            else:
                pos_x=np.nan




            if pos_x>0:
                peak_pos_saved[i]=pos_x
                peak_height_saved[i]=maxy

            else:
                peak_pos_saved[i]=np.nan
                peak_height_saved[i]=np.nan

            peak_bck[i]=np.quantile(y_plot, 0.2)
            av_y=np.quantile(y_plot, 0.5)
            Diff=np.max(y_plot)-np.min(y_plot)
            Y_sum=np.max(y_plot)/Diff
            # IF there is something that passes the filter
            if pos_x>0:
                y_star[i]=(peak_height_saved[i]-np.min(y_plot))/Diff+i
                ax1.plot(pos_x, y_star[i], '*k', mfc='yellow', ms=10)
            else:
                y_star[i]=np.nan

            ax1.plot(x_plot, ((y_plot-np.min(y_plot))/Diff)+i, '-r')
            ax1.plot(x_plot, ((y_plot-np.min(y_plot))/Diff)+i, '.k', ms=1)
            yplot[i]=np.min(((y_plot-np.min(y_plot))/Diff)+i)
            ax1.annotate(str(file), xy=(xlim_plot[1]+0.5, yplot[i]),
                         xycoords="data", fontsize=8)
            Y=Y+Y_sum
            i=i+1

        elif just_plot is True:
            Diff=np.max(y_plot)-np.min(y_plot)
            Y_sum=np.max(y_plot)/Diff
            ax1.plot(x_plot, ((y_plot-np.min(y_plot))/Diff)+i, '-r')
            ax1.plot(x_plot, ((y_plot-np.min(y_plot))/Diff)+i, '.k', ms=1)
            yplot[i]=np.min(((y_plot-np.min(y_plot))/Diff)+i)
            ax1.annotate(str(file), xy=(xlim_plot[1]+0.5, yplot[i]),
                         xycoords="data", fontsize=8)
            Y=Y+Y_sum
            i=i+1



    df_peaks=pd.DataFrame(data={'pos': peak_pos_saved,
                                        'height': peak_height_saved,
                                            'prom': peak_prom_saved})

    #if sigma_filter is True:
        #ax1.plot(df_peaks['pos'][y_star>0], y_star[y_star>0], '*k', mfc='yellow', ms=10)










    ax1.plot([1078, 1078], [0, yplot[-1]+2], '-g', lw=1, label='Na$_2$CO$_3$')
    ax1.plot([1085, 1085], [0, yplot[-1]+2], '-', color='cornflowerblue', lw=1, label='CaCO$_3$')

    ax1.plot([1094, 1094], [0, yplot[-1]+2], '-c', lw=1, label='MgCO$_3$')
    ax1.plot([1097, 1097], [0, yplot[-1]+2], '-b', lw=1, label='Dolomite')
    ax1.plot([1131, 1131], [0, yplot[-1]+2], '-', color='grey', lw=1, label='CaSO$_4$')
    ax1.plot([1136, 1136], [0, yplot[-1]+2], '--', color='rosybrown', lw=1, label='MgSO$_4$')
    ax1.plot([1151, 1151], [0, yplot[-1]+2], '-k', lw=1, label='SO$_2$')


    ax1.fill_between(xlim_peaks, -0.5, yplot[-1]+3, color='blue', alpha=0.1)


    ax1.legend(ncol=7,loc='upper center', fontsize=10,  bbox_to_anchor=[0.5, 1.05])
    ax1.set_ylim([-0.5, yplot[-1]+3])
    ax1.set_xlim([xlim_plot[0], xlim_plot[1]+0.5])

    # df_peaks=pd.DataFrame(data={'filename': Diad_Files,
    #                         'pos': peak_pos_saved,
    #                             'prom': peak_height_saved-peak_bck})



    ax1.set_xlabel('Wavenumber (cm$^{-1}$)')
    ax1.set_yticks([])

    x_data=x_trim

    df_peaks['file_names']=Diad_Files

    return df_peaks, x_data, y_data, fig



## Diad Skewness from DeVitre et al.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import find_peaks
# New things needed here
from scipy.interpolate import interp1d
from scipy.signal import find_peaks
import os
from os import listdir
from os.path import isfile, join
from DiadFit.importing_data_files import *
from DiadFit.diads import *

np.seterr (invalid='raise')

encode="ISO-8859-1"







[docs]
def assess_diad1_skewness(*,  config1: diad1_fit_config=diad1_fit_config(), int_cut_off=0.3, path=None, filename=None, filetype=None,
skewness='abs', height=1, prominence=5, width=0.5, plot_figure=True, peak_fit_routine=False, peak_pos=None, peak_height=None, dpi=200, diad_array=None):
    """ Assesses Skewness of Diad peaks. Useful for identifying mixed L + V phases
    (see DeVitre et al. in review)


    Parameters
    -----------
    config1: diad1_fit_config() dataclass. Import parameters for this function are.

        lower_bck_diad1, upper_bck_diad1: [float, float]
            background position to left and right of overal diad region (inc diad+-HB+-C13)

        N_poly_bck_diad1: int
            degree of polynomial to fit to the background.

        exclude_range1, exclude_range2: Tuple[float, float]
            Range to exclude

    int_cut_off: float
        Value of intensity at which to calculate the skewness (e.g. 0.15X the peak height).

    path: str
        Folder user wishes to read data from

    filename: str
        Specific file being read


    filetype: str
        Identifies type of file
        choose from 'Witec_ASCII', 'headless_txt', 'headless_csv', 'head_csv', 'Witec_ASCII',
        'HORIBA_txt', 'Renishaw_txt'

    skewness: 'abs' or 'dir'
        If 'abs', gives absolute skewness (e.g., largest possible number regardless of direction)
        if 'dir' does skewness as one side over the other


    height, prominence, width: float
        Scipy find peak parameters

    plot_figure: bool
        if True, plots and saves a figure.

    dpi: int
        dpi to save image at

    peak_fit_routine: bool (default False) -
        if True, doesnt find max of spline, but uses the peak position you have found from the diad fitting functions.
        Also needs peak_pos (peak center in cm-1) and peak_height


    Returns
    -----------
    pd.DataFrame with information about file and skewness parameters.

    """



    y_corr_diad1, Py_base_diad1, x_diad1,  Diad_short, Py_base_diad1, Pf_baseline,  Baseline_ysub_diad1, Baseline_x_diad1, Baseline, span=remove_diad_baseline(
    path=path, filename=filename, filetype=filetype, N_poly=config1.N_poly_bck_diad1,
    lower_bck=config1.lower_bck_diad1, upper_bck=config1.upper_bck_diad1, plot_figure=False, diad_array=diad_array)



    x_lin_baseline=np.linspace(config1.lower_bck_diad1[0], config1.upper_bck_diad1[1], 100000)
    ybase_xlin=Pf_baseline(x_lin_baseline)



    # Get x and y ready to make a cubic spline through data
    x=x_diad1
    y=y_corr_diad1

    # Fit a  cubic spline
    f2 = interp1d(x, y, kind='cubic')
    x_new=np.linspace(np.min(x), np.max(x), 100000)

    y_cub=f2(x_new)



    # Use Scipy find peaks to get the biggest peak
    if peak_fit_routine is False:
        height=height
        peaks = find_peaks(y_cub, height=height, prominence=prominence, width=width)
        peak_height=peaks[1]['peak_heights']
        peak_pos = x_new[peaks[0]]


        # find max peak.  put into df because i'm lazy
        peak_df=pd.DataFrame(data={'pos': peak_pos,
                            'height': peak_height})

        # Find bigest peaks,
        df_peak_sort=peak_df.sort_values('height', axis=0, ascending=False)
        df_peak_sort_trim=df_peak_sort[0:1]
        Peak_Center=df_peak_sort_trim['pos']
        Peak_Height=df_peak_sort_trim['height']

    # now we are using peak parameters we got from Voigt/Pseudovoigt etc.
    if peak_fit_routine is True:
        Peak_Height=peak_height
        Peak_Center=peak_pos



    # Find intensity cut off
    y_int_cut=Peak_Height.iloc[0]*int_cut_off

    # Split the array into a LHS and a RHS

    LHS_y=y_cub[x_new<=Peak_Center.values]
    RHS_y=y_cub[x_new>Peak_Center.values]

    LHS_x=x_new[x_new<=Peak_Center.values]
    RHS_x=x_new[x_new>Peak_Center.values]

    # Need to flip LHS to put into the find closest function
    LHS_y_flip=np.flip(LHS_y)
    LHS_x_flip=np.flip(LHS_x)

    val=np.argmax(LHS_y_flip<y_int_cut)

    val2=np.argmax(RHS_y<y_int_cut)


    # Find nearest x unit to this value
    y_nearest_LHS=LHS_y_flip[val]
    x_nearest_LHS=LHS_x_flip[val]

    y_nearest_RHS=RHS_y[val2]
    x_nearest_RHS=RHS_x[val2]


    # Return Skewness
    LHS_Center=abs(x_nearest_LHS-Peak_Center)
    RHS_Center=abs(x_nearest_RHS-Peak_Center)


   #Also get option to always take biggest value of skewness
    if skewness=='abs':
        if LHS_Center.values>RHS_Center.values:
            AS=LHS_Center.values/RHS_Center.values
        else:
            AS=RHS_Center.values/LHS_Center.values
    elif skewness=='dir':
        AS=RHS_Center.values/LHS_Center.values


    if plot_figure is True:

        # Make pretty figure showing background subtractoin routine
        fig, ((ax2, ax1), (ax3, ax4)) = plt.subplots(2, 2, figsize=(10,10))
        fig.suptitle('Diad 1 Skewness file= '+ str(filename), fontsize=16, x=0.5, y=1.0)

        ax2.set_title('a) Spectra')
        ax2.plot(Diad_short[:, 0], Diad_short[:, 1], '-r', label='Data')
        ax2.set_xlim([config1.lower_bck_diad1[0]+40, config1.upper_bck_diad1[1]-40])
        ax1.set_title('b) Background fit')
        ax1.plot(Diad_short[:, 0], Diad_short[:, 1], '.-c', label='Data')

        ax1.plot(Baseline[:, 0], Baseline[:, 1], '.b', label='bck')
        ax1.plot(Diad_short[:, 0], Py_base_diad1, '-k', label='bck fit')
        ax3.set_title('c) Background subtracted')
        ax3.plot(x_diad1,y_corr_diad1, '-r', label='Bck sub')

    # Adds cubic interpoloation for inspection

        ax4.set_title('d) Cubic Spline')




        ax4.plot([x_nearest_LHS, Peak_Center.iloc[0]], [y_int_cut, y_int_cut], '-g')

        ax4.annotate(str(np.round(LHS_Center.values[0], 2)),
                     xy=(x_nearest_LHS-3, y_int_cut+(Peak_Height-y_int_cut)/10), xycoords="data",
                     fontsize=12, color='green')
        ax4.plot(x_nearest_LHS, y_nearest_LHS, '*k', mfc='green',ms=15, label='RH tie')



        ax4.plot([Peak_Center.iloc[0], x_nearest_RHS], [y_int_cut, y_int_cut], '-', color='grey')
        ax4.annotate(str(np.round(RHS_Center.values[0], 2)),
                     xy=(x_nearest_RHS+3, y_int_cut+(Peak_Height.iloc[0]-y_int_cut)/10), xycoords="data",
                      fontsize=12, color='grey')
        ax4.plot(x_nearest_RHS, y_nearest_RHS, '*k', mfc='grey', ms=15, label='LH tie')




        ax4.plot(x, y, '.r')
        ax4.plot(x_new, y_cub, '-k')

        ax4.plot([Peak_Center.iloc[0], Peak_Center.iloc[0]], [Peak_Height.iloc[0], Peak_Height.iloc[0]],
             '*k', mfc='blue', ms=15, label='Scipy Center')


        # Add to plot

        ax4.plot(config1.lower_bck_diad1[0], config1.upper_bck_diad1[1], [y_int_cut, y_int_cut], ':r')




        ax4.set_xlim([x_nearest_LHS-10, x_nearest_RHS+10])
        ax4.set_ylim([0-10, Peak_Height.iloc[0]*1.2])
        ax4.plot([Peak_Center.iloc[0], Peak_Center.iloc[0]], [0, Peak_Height.iloc[0]], ':b')



        ax4.annotate('Skewness='+str(np.round(AS[0], 2)),
                     xy=(x_nearest_RHS+2, y_int_cut+y_int_cut+(Peak_Height.iloc[0]-y_int_cut)/10), xycoords="data",
                      fontsize=12)

        ax2.legend()
        ax1.legend()
        ax3.legend()
        ax4.legend()
        ax1.set_xlabel('Wavenumber (cm$^{-1}$)')
        ax2.set_xlabel('Wavenumber (cm$^{-1}$)')
        ax3.set_xlabel('Wavenumber (cm$^{-1}$)')
        ax4.set_xlabel('Wavenumber (cm$^{-1}$)')
        ax1.set_ylabel('Intensity')
        ax2.set_ylabel('Intensity')
        ax3.set_ylabel('Intensity')
        ax4.set_ylabel('Intensity')
        fig.tight_layout()



        path3=path+'/'+'Skewness_images'
        if os.path.exists(path3):
            out='path exists'
        else:
            os.makedirs(path+'/'+ 'Skewness_images', exist_ok=False)


        file=filename.rsplit('.txt', 1)[0]
        fig.savefig(path3+'/'+'Diad1_skewness_{}.png'.format(file), dpi=dpi)

    df_out=pd.DataFrame(data={'filename':filename,
                              'Skewness_diad1': AS,
                              'LHS_tie_diad1': x_nearest_LHS,
                              'RHS_tie_diad1': x_nearest_RHS})

    return df_out





[docs]
def assess_diad2_skewness(*,  config1: diad2_fit_config=diad1_fit_config(), int_cut_off=0.3, path=None, filename=None, filetype=None,
skewness='abs', height=1, prominence=5, width=0.5, plot_figure=True, dpi=200, peak_fit_routine=False, peak_pos=None, peak_height=None, diad_array=None):

    """ Assesses Skewness of Diad peaks. Useful for identifying mixed L + V phases
    (see DeVitre et al. in review)


    Parameters
    -----------
    config1: diad2_fit_config() dataclass. Import parameters for this function are.

        lower_bck_diad2, upper_bck_diad2: [float, float]
            background position to left and right of overal diad region (inc diad+-HB+-C13)

        N_poly_bck_diad1: int
            degree of polynomial to fit to the background.

        exclude_range1, exclude_range2: Tuple[float, float]
            Range to exclude

    int_cut_off: float
        Value of intensity at which to calculate the skewness (e.g. 0.15X the peak height).

    path: str
        Folder user wishes to read data from

    filename: str
        Specific file being read


    filetype: str
        Identifies type of file
        choose from 'Witec_ASCII', 'headless_txt', 'headless_csv', 'head_csv', 'Witec_ASCII',
        'HORIBA_txt', 'Renishaw_txt'

    skewness: 'abs' or 'dir'
        If 'abs', gives absolute skewness (e.g., largest possible number regardless of direction)
        if 'dir' does skewness as one side over the other


    height, prominence, width: float
        Scipy find peak parameters

    plot_figure: bool
        if True, plots and saves a figure.

    dpi: int
        dpi to save image at

    peak_fit_routine: bool (default False) -
        if True, doesnt find max of spline, but uses the peak position you have found from the diad fitting functions.
        Also needs peak_pos (peak center in cm-1) and peak_height



    Returns
    -----------
    pd.DataFrame with information about file and skewness parameters.

    """


# First, do the background subtraction
    y_corr_diad2, Py_base_diad2, x_diad2,  Diad_short, Py_base_diad2, Pf_baseline,  Baseline_ysub_diad2, Baseline_x_diad2, Baseline, span=remove_diad_baseline(
    path=path, filename=filename, filetype=filetype, N_poly=config1.N_poly_bck_diad2,
    lower_bck=config1.lower_bck_diad2, upper_bck=config1.upper_bck_diad2, plot_figure=False, diad_array=diad_array)



    x_lin_baseline=np.linspace(config1.lower_bck_diad2[0], config1.upper_bck_diad2[1], 100000)
    ybase_xlin=Pf_baseline(x_lin_baseline)



# Get x and y for cubic spline
    x=x_diad2
    y=y_corr_diad2

# Fits a  cubic spline
    f2 = interp1d(x, y, kind='cubic')
    x_new=np.linspace(np.min(x), np.max(x), 100000)

    y_cub=f2(x_new)


    if peak_fit_routine is False:
# Use Scipy find peaks to get that cubic peak
        peaks = find_peaks(y_cub, height=height, prominence=prominence, width=width)
        peak_height=peaks[1]['peak_heights']
        peak_pos = x_new[peaks[0]]

        # find max peak.  put into df because i'm lazy
        peak_df=pd.DataFrame(data={'pos': peak_pos,
                            'height': peak_height})

        df_peak_sort=peak_df.sort_values('height', axis=0, ascending=False)
        df_peak_sort_trim=df_peak_sort[0:1]
        Peak_Center=df_peak_sort_trim['pos'].iloc[0]
        Peak_Height=df_peak_sort_trim['height'].iloc[0]

    if peak_fit_routine is True:
        Peak_Height=peak_height[0]
        Peak_Center=peak_pos[0]




# Find intensity cut off
    y_int_cut=Peak_Height*int_cut_off

    # Split the array into a LHS and a RHS
    LHS_y=y_cub[x_new<=Peak_Center]
    RHS_y=y_cub[x_new>Peak_Center]

    LHS_x=x_new[x_new<=Peak_Center]
    RHS_x=x_new[x_new>Peak_Center]

    # Need to flip LHS to put into the find closest function
    LHS_y_flip=np.flip(LHS_y)
    LHS_x_flip=np.flip(LHS_x)

    val=np.argmax(LHS_y_flip<y_int_cut)

    val2=np.argmax(RHS_y<y_int_cut)


    # Find nearest x unit to this value
    y_nearest_LHS=LHS_y_flip[val]
    x_nearest_LHS=LHS_x_flip[val]

    y_nearest_RHS=RHS_y[val2]
    x_nearest_RHS=RHS_x[val2]

    # Return Skewness
    LHS_Center=abs(x_nearest_LHS-Peak_Center)
    RHS_Center=abs(x_nearest_RHS-Peak_Center)

    #Added conditional to always have a ratio of the high/low
    if skewness=='abs':
        if LHS_Center>RHS_Center:
            AS=LHS_Center/RHS_Center
        else:
            AS=RHS_Center/LHS_Center
    elif skewness=='dir':
        AS=RHS_Center/LHS_Center


    if plot_figure is True:

        # Make pretty figure showing background subtractoin routine
        fig, ((ax2, ax1), (ax3, ax4)) = plt.subplots(2, 2, figsize=(10,10))
        fig.suptitle('Diad 2 Skewness file= '+ str(filename), fontsize=16, x=0.5, y=1.0)

        ax2.set_title('a) Spectra')
        ax2.plot(Diad_short[:, 0], Diad_short[:, 1], '-r', label='Data')
        ax2.set_xlim([config1.lower_bck_diad2[0]+40, config1.upper_bck_diad2[1]-40])
        ax1.set_title('b) Background fit')
        ax1.plot(Diad_short[:, 0], Diad_short[:, 1], '.-r', label='Data')

        ax1.plot(Baseline[:, 0], Baseline[:, 1], '.b', label='bck')
        ax1.plot(Diad_short[:, 0], Py_base_diad2, '-k', label='bck fit')
        ax3.set_title('c) Background subtracted')
        ax3.plot( x_diad2,y_corr_diad2, '-r', label='Bck sub')

    # Adds cubic interpoloation for inspection

        ax4.set_title('d) Cubic Spline')
        ax4.plot([x_nearest_LHS, Peak_Center], [y_int_cut, y_int_cut], '-g')
        ax4.annotate(str(np.round(LHS_Center, 2)),
                     xy=(x_nearest_LHS-3, y_int_cut+(Peak_Height-y_int_cut)/10), xycoords="data",
                     fontsize=12, color='green')
        ax4.plot(x_nearest_LHS, y_nearest_LHS, '*k', mfc='green',ms=15, label='RH tie')



        ax4.plot([Peak_Center, x_nearest_RHS], [y_int_cut, y_int_cut], '-', color='grey')
        ax4.annotate(str(np.round(RHS_Center, 2)),
                     xy=(x_nearest_RHS+3, y_int_cut+(Peak_Height-y_int_cut)/10), xycoords="data",
                      fontsize=12, color='grey')
        ax4.plot(x_nearest_RHS, y_nearest_RHS, '*k', mfc='grey', ms=15, label='LH tie')




        ax4.plot(x, y, '.r')
        ax4.plot(x_new, y_cub, '-k')

        ax4.plot([Peak_Center, Peak_Center], [Peak_Height, Peak_Height],
             '*k', mfc='blue', ms=15, label='Scipy Center')


        # Add to plot

        ax4.plot(config1.lower_bck_diad2[0], config1.upper_bck_diad2[1], [y_int_cut, y_int_cut], ':r')




        ax4.set_xlim([x_nearest_LHS-10, x_nearest_RHS+10])
        ax4.set_ylim([0-10, Peak_Height*1.2])
        ax4.plot([Peak_Center, Peak_Center], [0, Peak_Height], ':b')



        ax4.annotate('Skewness='+str(np.round(AS, 2)),
                     xy=(x_nearest_RHS+2, y_int_cut+y_int_cut+(Peak_Height-y_int_cut)/10), xycoords="data",
                      fontsize=10)

        ax2.legend()
        ax1.legend()
        ax3.legend()
        ax4.legend()
        ax1.set_xlabel('Wavenumber (cm$^{-1}$)')
        ax2.set_xlabel('Wavenumber (cm$^{-1}$)')
        ax3.set_xlabel('Wavenumber (cm$^{-1}$)')
        ax4.set_xlabel('Wavenumber (cm$^{-1}$)')
        ax1.set_ylabel('Intensity')
        ax2.set_ylabel('Intensity')
        ax3.set_ylabel('Intensity')
        ax4.set_ylabel('Intensity')
        fig.tight_layout()


        path3=path+'/'+'Skewness_images'
        if os.path.exists(path3):
            out='path exists'
        else:
            os.makedirs(path+'/'+ 'Skewness_images', exist_ok=False)


        file=filename.rsplit('.txt', 1)[0]
        fig.savefig(path3+'/'+'Diad2_skewness_{}.png'.format(file), dpi=dpi)


    df_out=pd.DataFrame(data={
                              'Skewness_diad2': AS,
                              'LHS_tie_diad2': x_nearest_LHS,
                              'RHS_tie_diad2': x_nearest_RHS}, index=[0])

    return df_out




[docs]
def loop_diad_skewness(*, Diad_files, path=None, filetype=None,  skewness='abs', sort=False, int_cut_off=0.15,
config_diad1: diad1_fit_config=diad1_fit_config(), config_diad2: diad2_fit_config=diad2_fit_config(), prominence=10, width=1, height=1):
    """ Loops over all supplied files to calculate skewness for multiple spectra using the functions
    assess_diad1_skewness() and assess_diad2_skewness()


    Parameters
    -----------
    Diad_Files: list
        List of filenames to loop through

    path: str
        Folder user wishes to read data from

    filetype: str
        Identifies type of file
        choose from 'Witec_ASCII', 'headless_txt', 'headless_csv', 'head_csv', 'Witec_ASCII',
        'HORIBA_txt', 'Renishaw_txt'

    skewness: 'abs' or 'dir'
        If 'abs', gives absolute skewness (e.g., largest possible number regardless of direction)
        if 'dir' does skewness as one side over the other



    sort: bool
        If true, sorts files alphabetically
    file_ext: str
        File format. Default txt, could also enter csv etc.
    exclude_str: str
        Excludes files with this string in their name. E.g. if exclude_str='Ne' it will exclude Ne lines
    exclude_type: str
        Excludes files of this type, e.g. exclude_type='png' gets rid of image files.


    lower_baseline_diad1, lower_baseline_diad2: list
        Region of spectra to use a baseline (LHS) for diad 1 and diad2

    upper_baseline_diad1, upper_baseline_diad2: list
        Region of spectra to use a baseline (RHS) for diad 1 and diad 2

    N_poly_bck_diad1, N_poly_bck_diad2: int
        Degree of polynomial for background fitting

    skewness: 'abs' or 'dir'
        If 'abs', gives absolute skewness (e.g., largest possible number regardless of direction)
        if 'dir' does skewness as one side over the other

    int_cut_off: float
        Value of intensity at which to calculate the skewness (e.g. 0.15X the peak height).


    Returns
    -----------
    pd.DataFrame wtih filename, skewness of Diad 1,
    the x position of the LH and RH tie point at intensity=int_int_cut_off



    """



    df_diad1 = pd.DataFrame([])
    df_diad2 = pd.DataFrame([])
    df_merged=pd.DataFrame([])
    for i in range(0, len(Diad_files)):

        filename=Diad_files[i]
        print('working on file #'+str(i))

        data_diad1=assess_diad1_skewness(config1=config_diad1,
        int_cut_off=int_cut_off, prominence=prominence, height=height, width=width,
        skewness=skewness, path=path, filename=filename,
        filetype=filetype)

        data_diad2=assess_diad2_skewness(config1=config_diad2,
        int_cut_off=int_cut_off, prominence=prominence, height=height, width=width,
        skewness=skewness, path=path, filename=filename,
        filetype=filetype)


        df_diad1 = pd.concat([df_diad1, data_diad1], axis=0)
        df_diad2 =  pd.concat([df_diad2, data_diad2], axis=0)



    df_combo=pd.concat([df_diad1, df_diad2], axis=1)

    cols_to_move=['filename', 'Skewness_diad1', 'Skewness_diad2']

    df_combo = df_combo[cols_to_move + [
        col for col in df_combo.columns if col not in cols_to_move]]




    return df_combo






## Give nice column names