import DOMPurify from 'dompurify';
// List of ICANN top level domains. Includes punycodes
let tld = require('../Assets/tld.json');

export const containsPotentialUrl = (encodedString) => {
  if (typeof encodedString !== 'string') return false;
  let stringToTest = encodedString;
  // Don't match strings that have been edited
  if (encodedString?.indexOf('<i class="messageArrow"/>') !== -1) {
    stringToTest = encodedString.split('<i class="messageArrow"/>')[1];
  }
  // Check if string is wrapped with parantheses, to avoid issues with detecting trailling parantheses as part of the link path
  if(stringToTest.startsWith('(') && stringToTest.endsWith(')')){
    stringToTest = stringToTest.slice(1, stringToTest.length - 1)
  }
  // Check if message has anchor tags in it
  const anchorRegex = /<a\s+(?:[^>]*?\s+)?href\s*=\s*["'`]([-a-zA-Z0-9@:%.-_+~#=&]{1,256})["'`](?:[^>]*?\s+)?>([-a-zA-Z0-9@:%.-_+~#=&]{1,256})<\/a>/;
  // Regex to detect a full HTTPS URL is embedded in a string. If found we still want to detect it since it is clear and there are limited false postitives if HTTPS is present.
  const withinStringHttpRegex = /https?:\/\/(www\.)?[-a-zA-Z0-9@:%._+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_+.~#?&//=]*)/;
  // Regex for general link detection and dissection. As it's impossible to catch everything with regex, this is meant for broad capture,
  // and filtering as the first layer of detection. Most likely can be expanded on with more tweaks.
  // Ex. URL structure
  // Group 1 
  // Make sure there is a space or new line character in front of a match, to prevent detecting urls within text. (Ex. tacohttps://facebook.com)
  // "(^|\s+)"  

  // Group 2 https://
  // Matches the http:// or https:// or ftp:// between zero and one times, as many times as possible, giving back as needed (greedy)
  // "(https:\/\/|http:\/\/|ftp:\/\/)?"  

  // Group 3 www.
  // Match any of these characters a-zA-Z0-9@:%._+~#=- between 1-256 times, and cannot have two consecutive characters or . or - and must end in .
  // and cannot start with .+~#=@:%-
  // "((?![.+~#=@:%-])(?!.*[.-]{2})[a-zA-Z0-9@:%._+~#=-]{1,256}\.)?"

  // Group 4 google
  // Match any of these characters a-zA-Z0-9@:%._+~#=- between 1-256 times, and must start with one of a-zA-Z% and cannot have a consecutive . or -
  // and cannot start with .+~#=@:- which still allows % for non-ascii URI encoding
  // "((?![.+~#=@:-])(?!.*[.-]{2})[a-zA-Z0-9@:%._+~#=-]{1,256})"

  // Group 5 .
  // Match a period or colon for port or TLD
  // "(\\.|:)"

  // Group 6 com
  // Match any of these characters a-zA-Z0-9()% between 1, 256 times
  // "([a-zA-Z0-9()%]{1,256})"

  // Group 7 /meet
  // Match any of these characters a-zA-Z0-9()@:%_+.~#?&/=- and must start with /
  // "(\b[/]{1}[a-zA-Z0-9()@:%_+.~#?&/=-]*)?"

  // Placed expression in one line due to issues with escaping when generating it using a string
  const httpsRegex = new RegExp(/(^|\s+)(https:\/\/|http:\/\/|ftp:\/\/)?((?![.+~#=@:%-])(?!.*[.-]{2})[a-zA-Z0-9@:%._+~#=-]{1,256}\.)?((?![.+~#=@:-])(?!.*[.-]{2})[a-zA-Z0-9@:%._+~#=-]{1,256})(\.|:)([a-zA-Z0-9()%]{1,256})(\b[/]{1}[a-zA-Z0-9()@:%_+.~#?&/=-]*)?/);
  // Simple regex to check for a valid port number for links including ports
  const portRegex = /^((6553[0-5])|(655[0-2][0-9])|(65[0-4][0-9]{2})|(6[0-4][0-9]{3})|([1-5][0-9]{4})|([0-5]{0,5})|([0-9]{1,4}))$/
  // match all anchor tags with valid href attributes
  let match = stringToTest.match(anchorRegex)
  if (match) {
    return match;
  }

  // Check for strict HTTPS match
  match = stringToTest.match(withinStringHttpRegex);
  if (match) {
    return match;
  }

  match = stringToTest.match(httpsRegex);

  // Ensure the structure of the string is correct
  const validateStructure = (match) => {
    // Domain is valid existing top level domain
    const validDomain = tld.includes(match[6]?.toUpperCase());
    // Has a colon indicating a port or is a actual IP address
    const hasPortOrIsIp = (match[5] === ":" || stringToTest.split('.').length === 4);
    // Has a domain and detected port number is valid port number
    const hasDomainAndValidPort = !!match[3] && match[6]?.match(portRegex);
    if (validDomain) {
      return true;
    } else if (hasPortOrIsIp && hasDomainAndValidPort) {
      return true;
    } else {
      return false;
    }
  }

  if (match) {
    if (validateStructure(match)) {
      try {
        let url = match[0].trim();
        if (url?.indexOf('http') === -1) {
          url = `https://${url}`
        }
        // Using the built in URL constructor can break down a potential URL
        // and ensure it's not an email by checking the username value that returns
        if (!new URL(url).username) {
          return match;
        } else {
          return false;
        }
        // This catch block properly returns false if your URL contructor detects an invalid URL
      } catch (e) {
        return false;
      }
    } else {
      return false;
    }
  }
  return false;
}

export const containsUrl = async (string) => {
  if (!string || typeof string !== 'string') {
    return;
  }

  // Filter out anchor tags to prevent conflicts with anchor tag wrapping
  string = string.replace(/<\/?a[^>]*>/g, "");

  // We sanitize and remove all spaces before splitting to prevent issues not splitting due to new line characters, etc
  let listOfStrings = DOMPurify.sanitize(string).replace(/\s/g, " ").split(" ");

  let matches = [];
  let msg = '';
  let arr = [];
  for await (const string of listOfStrings) {
    // Trim string piece to better detect structure of non-ascii links
    let trimmedString = string.trim();
    // let encodedString = encodeURI(trimmedString);  // 2/21/24 Harrison - Removing encoding for the time being. Intended to detect international domains, but is causing issues with encoded URIs 

    // If string is a time-like string , we skip this string
    if (/^\d{1,2}:\d{2}$/.test(trimmedString)) {
      continue;
    }

    // Ideally, later we will convert the un-encoded string to punycode to detect non-ascii TLDs Ex. XN--1CK2E1B
    // and do a separate check against our list of TlDs 
    let match = containsPotentialUrl(trimmedString);
    if (match) {
      matches.push(match[0].trim());
    }
    else {
      continue;
    }
  }

  if (matches.length) {
    // If we have matches, loop through and generate anchor tags
    for await (let link of matches) {
      // remove punctuation from match results
      if (link?.endsWith('?') || link?.endsWith('.')) {
        link = link.slice(0, -1)
      }
      if (link) {
        let url = link;
        // If detected url does not contain http we add https:// for better navigation when clicked
        if (url?.indexOf('http') === -1) {
          url = `https://${link}`
        }
        // Create anchor tag string with https concatted link, and decodedURI of link to display non-ascii characters in their original charset
        let element = `<a href="${url}" target="_blank" rel="noreferrer">${link}</a>`
        // Replace instances of match with <link> tag for use later in inserting anchor tags
        string = string.replace(link, `<link>`);
        // Throw anchor tag element into arr for use later in reduce
        arr.push(element);
      }
    }
    // All at once we replace <link> tags with our arr of anchor elements
    return string.split('<link>').reduce((a, v, i) => arr?.length - 1 >= i ? a + v + arr[i] : a + v, msg);
  } else {
    return false;
  }
};