import getStringsSimilarity from "utils/text/get_strings_similarity";
import getLongestCommonSubstring from "utils/text/get_longest_common_substring";

const SIMILARITY_THRESHOLD = 0.85;
const LCS_THRESHOLD = 50; // Longest Common Substring, number of chars

function getSimilarityData(applicableClause, comparisonClause) {
  const similarityRatio = getStringsSimilarity(
    applicableClause.partial_text,
    comparisonClause.partial_text,
  );

  if (similarityRatio > SIMILARITY_THRESHOLD) {
    const isExactReference =
      applicableClause.reference === comparisonClause.reference;
    // identical clauses mean 100% same clauses. e.g. "1.2.a Xxx" === "1.2.a Xxx"
    return {
      similarityRatio,
      areClausesSimilar: true,
      areClausesIdentical: similarityRatio === 1 && isExactReference,
    };
  }

  const lcsLength = (
    getLongestCommonSubstring(
      applicableClause.partial_text,
      comparisonClause.partial_text,
    ) || ""
  ).length;

  if (lcsLength > LCS_THRESHOLD) {
    return {
      similarityRatio,
      areClausesSimilar: true,
      areClausesIdentical: false,
    };
  }
  return {
    similarityRatio,
    areClausesSimilar: false,
    areClausesIdentical: false,
  };
}

export default getSimilarityData;
