Comparez deux textes HTML et marquez les différences
P粉478445671
P粉478445671 2023-09-10 14:19:08
0
1
573

Je veux marquer la différence entre deux textes HTML, j'ai trouvé une solution, tout fonctionne comme prévu (les fichiers supprimés reçoivent le nom de classe "del", le texte inséré reçoit le nom de classe "ins" et les styles associés), il n'y a qu'une seule chose qui ne va pas - pour une raison quelconque, le texte est marqué au mauvais endroit.

var Match,
  calculate_operations,
  consecutive_where,
  create_index,
  diff,
  find_match,
  find_matching_blocks,
  html_to_tokens,
  is_end_of_tag,
  is_start_of_tag,
  is_tag,
  is_whitespace,
  isnt_tag,
  op_map,
  define,
  recursively_find_matching_blocks,
  render_operations,
  wrap;

is_end_of_tag = function (char) {
  return char === '>';
};

is_start_of_tag = function (char) {
  return char === '<';
};

is_whitespace = function (char) {
  return /^\s+$/.test(char);
};

is_tag = function (token) {
  return /^\s*<[^>]+>\s*$/.test(token);
};

isnt_tag = function (token) {
  return !is_tag(token);
};

Match = class Match {
  constructor(start_in_before1, start_in_after1, length1) {
    console.log('lalala');

    this.start_in_before = start_in_before1;
    this.start_in_after = start_in_after1;
    this.length = length1;
    this.end_in_before = this.start_in_before + this.length - 1;
    this.end_in_after = this.start_in_after + this.length - 1;
  }
};

html_to_tokens = function (html) {
  var char, current_word, i, len, mode, words, letters;
  mode = 'char';
  current_word = '';
  // letters = [];
  words = [];
  for (i = 0, len = html.length; i < len; i++) {
    char = html[i];
    // letters.push(char);
    switch (mode) {
      case 'tag':
        if (is_end_of_tag(char)) {
          current_word += '>';
          words.push(current_word);
          current_word = '';
          if (is_whitespace(char)) {
            mode = 'whitespace';
          } else {
            mode = 'char';
          }
        } else {
          current_word += char;
        }
        break;
      case 'char':
        if (is_start_of_tag(char)) {
          if (current_word) {
            words.push(current_word);
          }
          current_word = '<';
          mode = 'tag';
        } else if (/\s/.test(char)) {
          if (current_word) {
            words.push(current_word);
          }
          current_word = char;
          mode = 'whitespace';
        } else if (/[\w\#@]+/i.test(char)) {
          current_word += char;
        } else {
          if (current_word) {
            words.push(current_word);
          }
          current_word = char;
        }
        break;
      case 'whitespace':
        if (is_start_of_tag(char)) {
          if (current_word) {
            words.push(current_word);
          }
          current_word = '<';
          mode = 'tag';
        } else if (is_whitespace(char)) {
          current_word += char;
        } else {
          if (current_word) {
            words.push(current_word);
          }
          current_word = char;
          mode = 'char';
        }
        break;
      default:
        throw new Error(`Unknown mode ${mode}`);
    }
  }
  if (current_word) {
    words.push(current_word);
  }

  return words;
};

find_match = function (
  before_tokens,
  after_tokens,
  index_of_before_locations_in_after_tokens,
  start_in_before,
  end_in_before,
  start_in_after,
  end_in_after
) {
  var best_match_in_after,
    best_match_in_before,
    best_match_length,
    i,
    index_in_after,
    index_in_before,
    j,
    len,
    locations_in_after,
    looking_for,
    match,
    match_length_at,
    new_match_length,
    new_match_length_at,
    ref,
    ref1;
  best_match_in_before = start_in_before;
  best_match_in_after = start_in_after;
  best_match_length = 0;
  match_length_at = {};
  for (
    index_in_before = i = ref = start_in_before, ref1 = end_in_before;
    ref <= ref1 ? i < ref1 : i > ref1;
    index_in_before = ref <= ref1 ? ++i : --i
  ) {
    new_match_length_at = {};
    looking_for = before_tokens[index_in_before];
    locations_in_after = index_of_before_locations_in_after_tokens[looking_for];

    for (j = 0, len = locations_in_after.length; j < len; j++) {
      index_in_after = locations_in_after[j];
      if (index_in_after < start_in_after) {
        continue;
      }
      if (index_in_after >= end_in_after) {
        break;
      }
      if (match_length_at[index_in_after - 1] == null) {
        match_length_at[index_in_after - 1] = 0;
      }
      new_match_length = match_length_at[index_in_after - 1] + 1;
      new_match_length_at[index_in_after] = new_match_length;
      if (new_match_length > best_match_length) {
        best_match_in_before = index_in_before - new_match_length + 1;
        best_match_in_after = index_in_after - new_match_length + 1;
        best_match_length = new_match_length;
      }
    }
    match_length_at = new_match_length_at;
  }
  if (best_match_length !== 0) {
    match = new Match(
      best_match_in_before,
      best_match_in_after,
      best_match_length
    );
  }
  return match;
};

recursively_find_matching_blocks = function (
  before_tokens,
  after_tokens,
  index_of_before_locations_in_after_tokens,
  start_in_before,
  end_in_before,
  start_in_after,
  end_in_after,
  matching_blocks
) {
  var match;
  match = find_match(
    before_tokens,
    after_tokens,
    index_of_before_locations_in_after_tokens,
    start_in_before,
    end_in_before,
    start_in_after,
    end_in_after
  );
  if (match != null) {
    if (
      start_in_before < match.start_in_before &&
      start_in_after < match.start_in_after
    ) {
      recursively_find_matching_blocks(
        before_tokens,
        after_tokens,
        index_of_before_locations_in_after_tokens,
        start_in_before,
        match.start_in_before,
        start_in_after,
        match.start_in_after,
        matching_blocks
      );
    }
    matching_blocks.push(match);
    if (
      match.end_in_before <= end_in_before &&
      match.end_in_after <= end_in_after
    ) {
      recursively_find_matching_blocks(
        before_tokens,
        after_tokens,
        index_of_before_locations_in_after_tokens,
        match.end_in_before + 1,
        end_in_before,
        match.end_in_after + 1,
        end_in_after,
        matching_blocks
      );
    }
  }
  return matching_blocks;
};

create_index = function (p) {
  var i, idx, index, len, ref, token;
  if (p.find_these == null) {
    throw new Error('params must have find_these key');
  }
  if (p.in_these == null) {
    throw new Error('params must have in_these key');
  }
  index = {};
  const arr = html_to_tokens(p.find_these);
  const arr_number = [];
  for (var i = 0; i < arr.length; i++) {
    arr_number.push(arr[i].length);
  }
  ref = p.find_these;

  for (i = 0, len = ref.length; i < len; i++) {
    token = ref[i];
    index[token] = [];
    idx = p.in_these.indexOf(token);
    while (idx !== -1) {
      index[token].push(idx);
      idx = p.in_these.indexOf(token, idx + 1);
    }
  }
  console.log(token, 'token');
  console.log(arr_number, 'arr_number');
  return index;
};

find_matching_blocks = function (before_tokens, after_tokens) {
  var index_of_before_locations_in_after_tokens, matching_blocks;
  matching_blocks = [];
  index_of_before_locations_in_after_tokens = create_index({
    find_these: before_tokens,
    in_these: after_tokens,
  });
  console.log(
    index_of_before_locations_in_after_tokens,
    'index_of_before_locations_in_after_tokens '
  );
  return recursively_find_matching_blocks(
    before_tokens,
    after_tokens,
    index_of_before_locations_in_after_tokens,
    0,
    before_tokens.length,
    0,
    after_tokens.length,
    matching_blocks
  );
};

calculate_operations = function (before_tokens, after_tokens) {
  var action_map,
    action_up_to_match_positions,
    i,
    index,
    is_single_whitespace,
    j,
    last_op,
    len,
    len1,
    match,
    match_starts_at_current_position_in_after,
    match_starts_at_current_position_in_before,
    matches,
    op,
    operations,
    position_in_after,
    position_in_before,
    post_processed;
  if (before_tokens == null) {
    throw new Error('before_tokens?');
  }
  if (after_tokens == null) {
    throw new Error('after_tokens?');
  }
  position_in_before = position_in_after = 0;
  operations = [];
  action_map = {
    'false,false': 'replace',
    'true,false': 'insert',
    'false,true': 'delete',
    'true,true': 'none',
  };
  matches = find_matching_blocks(before_tokens, after_tokens);
  console.log(matches, 'matches');
  matches.push(new Match(before_tokens.length, after_tokens.length, 0));
  for (index = i = 0, len = matches.length; i < len; index = ++i) {
    match = matches[index];
    match_starts_at_current_position_in_before =
      position_in_before === match.start_in_before;
    match_starts_at_current_position_in_after =
      position_in_after === match.start_in_after;
    action_up_to_match_positions =
      action_map[
        [
          match_starts_at_current_position_in_before,
          match_starts_at_current_position_in_after,
        ].toString()
      ];
    if (action_up_to_match_positions !== 'none') {
      operations.push({
        action: action_up_to_match_positions,
        start_in_before: position_in_before,
        end_in_before:
          action_up_to_match_positions !== 'insert'
            ? match.start_in_before - 1
            : void 0,
        start_in_after: position_in_after,
        end_in_after:
          action_up_to_match_positions !== 'delete'
            ? match.start_in_after - 1
            : void 0,
      });
    }
    if (match.length !== 0) {
      operations.push({
        action: 'equal',
        start_in_before: match.start_in_before,
        end_in_before: match.end_in_before,
        start_in_after: match.start_in_after,
        end_in_after: match.end_in_after,
      });
    }
    position_in_before = match.end_in_before + 1;
    position_in_after = match.end_in_after + 1;
  }
  post_processed = [];
  last_op = {
    action: 'none',
  };
  is_single_whitespace = function (op) {
    if (op.action !== 'equal') {
      return false;
    }
    if (op.end_in_before - op.start_in_before !== 0) {
      return false;
    }
    return /^\s$/.test(
      before_tokens.slice(op.start_in_before, +op.end_in_before + 1 || 9e9)
    );
  };
  for (j = 0, len1 = operations.length; j < len1; j++) {
    op = operations[j];
    if (
      (is_single_whitespace(op) && last_op.action === 'replace') ||
      (op.action === 'replace' && last_op.action === 'replace')
    ) {
      last_op.end_in_before = op.end_in_before;
      last_op.end_in_after = op.end_in_after;
    } else {
      post_processed.push(op);
      last_op = op;
    }
  }
  return post_processed;
};

consecutive_where = function (start, content, predicate) {
  var answer, i, index, last_matching_index, len, token;
  content = content.slice(start, +content.length + 1 || 9e9);
  // console.log(content, 'content');
  // console.log(predicate, 'predicate');
  // console.log(start, 'start');

  last_matching_index = void 0;
  for (index = i = 0, len = content.length; i < len; index = ++i) {
    token = content[index];
    answer = predicate(token);
    if (answer === true) {
      last_matching_index = index;
    }
    if (answer === false) {
      break;
    }
  }
  if (last_matching_index != null) {
    return content.slice(0, +last_matching_index + 1 || 9e9);
  }
  return [];
};

wrap = function (tag, content) {
  var length, non_tags, position, rendering, tags;
  rendering = '';
  position = 0;
  length = content.length;
  while (true) {
    if (position >= length) {
      break;
    }
    non_tags = consecutive_where(position, content, isnt_tag);
    position += non_tags.length;
    if (non_tags.length !== 0) {
      rendering += `<${tag}>${non_tags.join('')}</${tag}>`;
    }
    if (position >= length) {
      break;
    }
    tags = consecutive_where(position, content, is_tag);
    position += tags.length;
    rendering += tags.join('');
  }
  return rendering;
};

op_map = {
  equal: function (op, before_tokens, after_tokens) {
    return before_tokens
      .slice(op.start_in_before, +op.end_in_before + 1 || 9e9)
      .join('');
  },
  insert: function (op, before_tokens, after_tokens) {
    var val;
    const token_length = after_tokens.map(element => element.length);
    val = after_tokens.slice(op.start_in_after, op.end_in_after + 1 || 9e9);

    return wrap('ins', val);
  },
  delete: function (op, before_tokens, after_tokens) {
    var val;
    val = before_tokens.slice(op.start_in_before, +op.end_in_before + 1 || 9e9);

    return wrap('del', val);
  },
};

op_map.replace = function (op, before_tokens, after_tokens) {
  return (
    op_map.delete(op, before_tokens, after_tokens) +
    op_map.insert(op, before_tokens, after_tokens)
  );
};

render_operations = function (before_tokens, after_tokens, operations) {
  var i, len, op, rendering;
  rendering = '';
  console.log(operations.length, 'operations.length');
  console.log(operations, 'operations');
  for (i = 0, len = operations.length; i < len; i++) {
    op = operations[i];

    rendering += op_map[op.action](op, before_tokens, after_tokens);
  }
  return rendering;
};

diff = function (before, after) {
  var ops;
  if (before === after) {
    return before;
  }
  before = html_to_tokens(before);
  after = html_to_tokens(after);
  ops = calculate_operations(before, after);
  return render_operations(before, after, ops);
};

diff.html_to_tokens = html_to_tokens;

diff.find_matching_blocks = find_matching_blocks;

find_matching_blocks.find_match = find_match;

find_matching_blocks.create_index = create_index;

diff.calculate_operations = calculate_operations;

diff.render_operations = render_operations;

if (typeof define === 'function') {
  define([], function () {
    return diff;
  });
} else if (typeof module !== 'undefined' && module !== null) {
  module.exports = diff;
} else {
  this.htmldiff = diff;
}

Appelez le code que j'ai écrit :

let oldTxt = Match.html_to_tokens(a);
    let newTxt = Match.html_to_tokens(b);
    let ops = Match.calculate_operations(a, b);
    const render = Match.render_operations(newTxt, oldTxt, ops);
    document.getElementById('output').innerHTML = render;

a est le premier code HTML, b est le code HTML modifié (non stringifié)

Des idées sur la façon de procéder ? Merci pour votre aide!

Je sais que l'erreur se trouve quelque part dans la fonction "create_index". Parce que je calcule la longueur du texte et que je la compare ensuite à la longueur du texte, chaque balise/mot compte pour un.

Code Sandbox

P粉478445671
P粉478445671

répondre à tous(1)
P粉677573079

Je l'ai résolu ! Pour tous ceux qui souhaitent utiliser ce code, activez simplement cette partie :

let oldTxt = Match.html_to_tokens(a);
    let newTxt = Match.html_to_tokens(b);
    let ops = Match.calculate_operations(a, b);
    const render = Match.render_operations(newTxt, oldTxt, ops);
    document.getElementById('output').innerHTML = render;

À ceci :

    a = Match.html_to_tokens(a);
    b = Match.html_to_tokens(b);
    let ops = Match.calculate_operations(a, b);
    console.log(ops, 'ops');
    const render = Match.render_operations(a, b, ops);
    document.getElementById('output').innerHTML = render;

où a et b sont deux textes html : a (premier), b (changement)

Vous pouvez essayer le nouveau code ici : CodeSandbox résolu !

Derniers téléchargements
Plus>
effets Web
Code source du site Web
Matériel du site Web
Modèle frontal