import {
  GraphControlTypeEnum,
  GraphDatasource,
  GraphDatasourceError,
  GraphSidebarControls,
  GraphSidebarDatasourceResponse,
  GraphTypeEnum,
} from '../graph-sidebar';
import { Alphabet, AlphabetName } from '@geneious/shared-constants/types';
import { SeriesColumnOptions } from 'highcharts';

// @ts-ignore
import { Alphabets } from '@geneious/shared-constants';
import { SequenceLogoSettings } from '../../report/report.model';
import { StackedColumnColorSchemeHandler } from './stacked-column-colorscheme-handler';

export type FrequencyPlotColumns = Segment[][];

export interface Segment {
  residue: string;
  frequency: number;
  bits?: number;
}

export class SequenceLogoDatasource implements GraphDatasource {
  entropyMax: number;
  processedData: SeriesColumnOptions[];
  top100Only: boolean;
  byCount: boolean;

  private message: string | undefined;
  private readonly preprocessedData: FrequencyPlotColumns;
  private readonly entropyData: FrequencyPlotColumns;
  private colorSchemeHandler: StackedColumnColorSchemeHandler;

  constructor(
    private numberOfSequences: number,
    private data: any,
    private isDataFormatted: boolean,
    sequenceType: string,
    private initialOptions?: SequenceLogoSettings,
  ) {
    this.colorSchemeHandler = new StackedColumnColorSchemeHandler(
      sequenceType,
      initialOptions?.colorScheme,
    );
    this.entropyMax = this.colorSchemeHandler.sequenceType === 'Nucleotide' ? 2 : 4.32;
    if (this.isDataFormatted) {
      this.preprocessedData = this.processSequences(this.data);
      this.entropyData = SequenceLogoDatasource.convertToEntropy(
        this.preprocessedData,
        this.numberOfSequences,
        this.colorSchemeHandler.sequenceType,
      );
    } else {
      // TODO Is this still needed? I think really old documents had only "Cluster Contents" and it was percentages
      // so therefore if you have the "Cluster Contents %" column the "Cluster Contents" must be count based.
      this.byCount =
        (this.data['Cluster Contents %'] || this.data['Cluster Contents % (Top 100)']) != null;
      this.top100Only = this.data['Cluster Contents (Top 100)'] != null;
      const clusterContents =
        this.data['Cluster Contents'] || this.data['Cluster Contents (Top 100)'];
      const sequences = this.extractSequencesAndCounts(clusterContents);
      this.preprocessedData = this.processSequences(sequences);
      this.entropyData = SequenceLogoDatasource.convertToEntropy(
        this.preprocessedData,
        this.numberOfSequences,
        this.colorSchemeHandler.sequenceType,
      );
    }
  }

  validate(): GraphDatasourceError | null {
    return null;
  }

  init(): Promise<GraphSidebarDatasourceResponse> {
    const validation = this.validate();
    if (validation) {
      return Promise.resolve(validation);
    }
    const options = {
      plotBy: this.initialOptions?.isSetToEntropy ? 'Entropy' : 'Frequency',
      colorScheme: this.colorSchemeHandler.colorSchemeName,
      showLabels: this.initialOptions?.showLabels ?? true,
      showLegend: this.initialOptions?.showLegend ?? true,
    };
    return this.getData(options);
  }

  controlValueChanged(
    previousOptions: SequenceLogoSidebarOptions,
    options: SequenceLogoSidebarOptions,
  ): Promise<GraphSidebarDatasourceResponse> {
    this.colorSchemeHandler.colorSchemeName = options.colorScheme;
    return this.getData(options, previousOptions.plotBy !== options.plotBy);
  }

  private getData(
    options: SequenceLogoSidebarOptions,
    animations = true,
  ): Promise<GraphSidebarDatasourceResponse> {
    const isEntropy = this.isEntropy(options);

    this.processedData = this.formatGraphData(isEntropy ? this.entropyData : this.preprocessedData);

    return Promise.resolve(this.formatData(this.processedData, options, animations));
  }

  private formatData(
    data: SeriesColumnOptions[],
    options: SequenceLogoSidebarOptions,
    animations: boolean,
  ): GraphSidebarDatasourceResponse {
    const isEntropy = this.isEntropy(options);
    const message = this.errorMessage(isEntropy);
    const controls = this.generateControls(options);
    if (message) {
      return { error: message, controls };
    }

    return {
      graph: {
        data: this.colorSchemeHandler.setColors(data),
        title: isEntropy ? 'Entropy by Position' : 'Frequency by Position',
        xAxisTitle: 'Position',
        yAxisTitle: isEntropy ? 'Bits' : 'Frequency',
        yAxisRange: isEntropy ? { min: 0, max: this.entropyMax } : undefined,
        stacking: isEntropy ? 'normal' : 'percent',
        showLabels: options.showLabels,
        showLegend: options.showLegend,
        animations: animations,
        type: GraphTypeEnum.STACKED_COLUMN,
      },
      controls,
      options: {
        colorScheme: this.colorSchemeHandler.colorSchemeName,
        isSetToEntropy: isEntropy,
      },
    };
  }

  private generateControls(options: SequenceLogoSidebarOptions): GraphSidebarControls {
    return [
      {
        name: 'plotBy',
        label: 'Plot by',
        type: GraphControlTypeEnum.SELECT,
        defaultOption: this.isEntropy(options) ? 'Entropy' : 'Frequency',
        options: [
          {
            displayName: 'Frequency',
            value: 'Frequency',
          },
          {
            displayName: 'Entropy',
            value: 'Entropy',
          },
        ],
      },
      ...this.colorSchemeHandler.getStyleControls(options),
    ];
  }

  private isEntropy(options: SequenceLogoSidebarOptions): boolean {
    return options.plotBy === 'Entropy';
  }

  private allColumnsAreZero(isEntropy: boolean) {
    return (
      isEntropy &&
      this.processedData.every((residueData) =>
        (residueData.data as [number, number][]).every(([_, frequency]) => frequency === 0),
      )
    );
  }

  private errorMessage(isEntropy: boolean): string | undefined {
    const smallSample = this.allColumnsAreZero(isEntropy)
      ? 'All positions have an entropy of 0 bits due to the small-sample correction'
      : undefined;
    return this.message ?? smallSample;
  }

  private processSequences(
    data: { sequence: string; count: number; type: string }[],
  ): FrequencyPlotColumns {
    if (data.length < 2) {
      this.message = 'Sequence logo requires at least two sequences.';
      return [];
    }

    const length = data[0].sequence.length;
    if (!data.every(({ sequence }) => sequence.length === length)) {
      this.message = 'Sequences must be of the same length.';
      return [];
    }

    this.message = undefined;
    const summed = this.sumCountsByPosition(data, length);
    const sorted = this.sortPositionsByCount(summed);
    const totalCount = data.reduce((sum, { count }) => sum + count, 0);
    const percentages = this.convertToPercentage(sorted, totalCount);
    return SequenceLogoDatasource.removeAmbiguous(percentages, data[0].type as any);
  }

  private sumCountsByPosition(
    data: { sequence: string; count: number }[],
    length: number,
  ): FrequencyPlotColumns {
    const result = [];

    for (let i = 0; i < length; i++) {
      const position: Map<string, number> = new Map();

      data.forEach(({ sequence, count }) => {
        const residue = sequence[i];
        const value = position.get(residue) || 0;
        position.set(residue, count + value);
      });

      const residues: any[] = [];
      position.forEach((frequency, residue) => {
        residues.push({ residue, frequency });
      });
      result.push(residues);
    }

    return result;
  }

  private sortPositionsByCount(data: FrequencyPlotColumns): FrequencyPlotColumns {
    return data.map((position) => {
      return Array.from(position.values()).sort((a, b) => b.frequency - a.frequency);
    });
  }

  private convertToPercentage(counts: FrequencyPlotColumns, total: number) {
    return counts.map((position) => {
      return position.map((value) => {
        // Don't use *=, this causes rounding errors in entropy calculation.
        value.frequency = (value.frequency * 100) / total;
        return value;
      });
    });
  }

  private formatGraphData(data: FrequencyPlotColumns): SeriesColumnOptions[] {
    return data.reduce((agg, segments, columnIndex) => {
      segments.forEach((segment) => {
        const existingResidueColumn = agg.find(
          (residueColumn) => residueColumn.name === segment.residue,
        );
        if (existingResidueColumn) {
          existingResidueColumn.data.push([columnIndex + 1, segment.bits ?? segment.frequency]);
        } else {
          agg.push({
            name: segment.residue,
            data: [[columnIndex + 1, segment.bits ?? segment.frequency]],
            type: 'column',
          });
        }
      });
      return agg;
    }, [] as SeriesColumnOptions[]);
  }

  // Example input: ARIHSLSSSSLGH (63) ARIHSLGSSSLGH (4) ARIHSRSSSSLGH (2) ARIRSLSSSSLGH (2) ARIHSLSRSSLGH (1) ARIHSLSSCSLGH (1)
  // ARIHSLSSSSLEH (1) ARIHSLSSSSLGP (1) ARIHSLSSSSLGR (1) ARIHSPSSSSLGH (1) ARINSLSSSSLGH (1) ARNHSLSSSSLGH (1) ARVHSLSSSSLGH (1)
  private extractSequencesAndCounts(
    raw: string,
  ): { sequence: string; count: number; type: string }[] {
    const counts = /([A-Z*]+) \(([0-9]+)\)/g;
    const percents = /([A-Z*]+) \(([0-9][0-9]?[0-9]?(.[0-9]+)?)%\)/g;
    const regExp = this.byCount ? counts : percents;
    const result = [];

    let match;
    while ((match = regExp.exec(raw))) {
      const parsedCount = Number(match[2]);
      // Counts should never be 0, but if they are, this wreaks havoc with entropy calculation and causes a fatal NaN% error.
      // TODO This is a fix for old documents (November 2018 or earlier) where the 'Cluster Contents' has percentages, some values are 0.0% and come through to here as counts of 0. Change adjustment to 1 once support for these is removed.
      const adjustedCount = parsedCount === 0 ? 0.01 : parsedCount;

      result.push({
        sequence: match[1],
        count: adjustedCount,
        // Clustering can only be performed on amino acids.
        type: 'AminoAcid',
      });
    }

    return result;
  }

  /**
   * Redistributes frequencies of ambiguous residues to their corresponding canonical residues.
   */
  static removeAmbiguous(
    columns: FrequencyPlotColumns,
    type: 'Nucleotide' | 'AminoAcid',
  ): FrequencyPlotColumns {
    let rna = false;
    let dna = false;

    // Normalize U to T and work out if data contains DNA, RNA or both.
    const normalized = columns.map((column) =>
      column.map(({ residue, frequency }) => {
        if (type === 'Nucleotide') {
          if (residue === 'T') {
            dna = true;
          } else if (residue === 'U') {
            rna = true;
            residue = 'T';
          }
        }

        return { residue, frequency };
      }),
    );

    const result = this._removeAmbiguous(normalized, type);

    // Convert T back to U if input was RNA.
    return result.map((column) =>
      column.map(({ residue, frequency }) => ({
        residue: rna && !dna && residue === 'T' ? 'U' : residue,
        frequency: frequency,
      })),
    );
  }

  private static _removeAmbiguous(
    columns: FrequencyPlotColumns,
    type: 'Nucleotide' | 'AminoAcid',
  ): FrequencyPlotColumns {
    const alphabet: Alphabet = Alphabets[type];

    return columns.map((column) => {
      const frequencies: { [residue: string]: number } = {};

      function add(residue: any, frequency: any) {
        if (!frequencies[residue]) {
          frequencies[residue] = 0;
        }

        frequencies[residue] = frequency + frequencies[residue];
      }

      column.forEach((segment) => {
        const residues = alphabet.ambiguities[segment.residue];

        if (residues) {
          residues.split('').forEach((residue) => {
            add(residue, segment.frequency / residues.length);
          });
        } else {
          add(segment.residue, segment.frequency);
        }
      });

      return Object.entries(frequencies).map(([residue, frequency]) => ({ residue, frequency }));
    });
  }

  /**
   * Wrapper method to convert data from frequencies to entropies.
   */
  static convertToEntropy(
    columns: FrequencyPlotColumns,
    sequences: number,
    type: string,
  ): FrequencyPlotColumns {
    const { output, gapPercentages } = SequenceLogoDatasource.redistributeGaps(columns);
    const { errorCorrections, information } = SequenceLogoDatasource.calculateEntropy(
      output,
      sequences,
      type,
    );
    return SequenceLogoDatasource.applyEntropy(
      output,
      type,
      gapPercentages,
      errorCorrections,
      information,
    );
  }

  /**
   * Removes any gaps in all positions and redistributes their percentage frequency to all other residues.
   */
  private static redistributeGaps(input: FrequencyPlotColumns) {
    // Handle gaps by redistributing their percentages to other residues and removing them.

    // Copy and dereference the input.
    const output = this.cloneFrequencyPlotColumns(input);

    // Gap percentages are saved for rescaling later.
    const gapPercentages: any[] = [];

    input.forEach((position, index) => {
      const gapResidueIndex = output[index].findIndex((pair) => pair.residue === '-');

      if (gapResidueIndex === -1) {
        return;
      } else {
        const gapPercentage = output[index][gapResidueIndex].frequency;
        gapPercentages[index] = gapPercentage / 100;

        // Delete the gap.
        output[index].splice(gapResidueIndex, 1);

        // Re-index the array.
        output[index] = output[index].filter(() => true);

        // Distribute the gap percentage amongst all remaining residues.
        output[index].forEach((value) => {
          value.frequency += gapPercentage / output[index].length;
        });
      }
    });

    return { output, gapPercentages };
  }

  /**
   * Calculates per-position data and error corrections.
   *
   * @see https://en.wikipedia.org/wiki/Sequence_logo#Logo_creation
   * @see https://drive.google.com/open?id=1xP1FxWKVP7WFWvxqzwPALU3JZ5ArNo5P
   */
  private static calculateEntropy(columns: FrequencyPlotColumns, sequences: number, type: string) {
    const results: any[] = [];
    const errors: any[] = [];

    columns.forEach((column, i) => {
      const { error, information } = this._calculateEntropy(column, sequences, type);

      errors[i] = error;
      results[i] = information;
    });

    return {
      information: results,
      errorCorrections: errors,
    };
  }

  private static _calculateEntropy(segments: Segment[], sequenceCount: number, type: string) {
    const { log, log2 } = Math;
    const alphabet = Alphabets[type as AlphabetName].canonical;

    let entropy = 0;
    let diversity = alphabet.length;

    segments.forEach((segment) => {
      // Handle unexpected characters (e.g. amino acids 21 and 22, *, etc).
      // Any residues that have made it to this point are to be treated as first class citizens.
      // Diversity and error are to be calculated using the observed diversity at each position
      // and scaled down to canonical diversity later.
      if (!alphabet.includes(segment.residue)) {
        diversity++;
      }

      // Calculate Shannon entropy (Hi).
      const decimal = segment.frequency / 100;
      entropy = entropy - decimal * log2(decimal);
    });

    // Calculate the small-sample error correction (en).
    const error = (1 / log(2)) * ((diversity - 1) / (2 * sequenceCount));

    // Calculate the information content in bits (Ri).
    let information = log2(diversity) - (entropy + error);

    // "With this correction, the information content measured at various positions of an aligned set of random sequences
    // will vary above and below zero. On average, it should be zero outside a binding site. The information content inside
    // a site will rise above zero." - Sequence logo paper (Schneider, 1986; J. Mol. Biol.)
    // The error correction can push the overall information content below zero. For plotting purposes, we truncate to zero.
    if (information < 0) {
      information = 0;
    } else if (diversity > alphabet.length) {
      // Scale information content down to the canonical number of residues for equivalent y-axis heights.
      information = information * (log2(alphabet.length) / (log2(diversity) + error));
    }

    return { information, error };
  }

  /**
   * Applies previous entropy calculations to residue frequencies to determine final bit and percentage values per residue per position.
   */
  private static applyEntropy(
    input: FrequencyPlotColumns,
    type: string,
    gapPercentages: any,
    errorCorrections: any,
    information: any,
  ): FrequencyPlotColumns {
    // Copy and dereference the input.
    const output = this.cloneFrequencyPlotColumns(input);

    return output.map((position, index) => {
      return position.map((value) => {
        // Calculate the proportion of information content for the current residue.
        value.frequency *= information[index] / 100;

        // Scale down by the percentage of gaps at the position.
        if (gapPercentages[index]) {
          value.frequency *= 1 - gapPercentages[index];
        }

        // Save the bits value for tooltips.
        value.bits = value.frequency;

        const diversity = Alphabets[type as AlphabetName].canonical.length;

        // Convert from bits to percentages for plotting.
        value.frequency =
          (value.frequency / (Math.log2(diversity) + errorCorrections[index])) * 100;

        return value;
      });
    });
  }

  private static cloneFrequencyPlotColumns(columns: FrequencyPlotColumns): FrequencyPlotColumns {
    return columns.map((segments) => segments.map((segment) => ({ ...segment })));
  }
}

interface SequenceLogoSidebarOptions {
  plotBy: string;
  colorScheme: string;
  showLabels: boolean;
  showLegend: boolean;
}
