LINUX.ORG.RU

История изменений

Исправление rtxtxtrx, (текущая версия) :

#!/usr/bin/env python3
import difflib
import re
import sys

import requests


def unhtml(s: str) -> str:
    return re.sub(r'<[^<>]+>', '', s)


if __name__ == '__main__':
    if len(sys.argv) < 3:
        print('Usage:', sys.argv[0], 'URL1', 'URL2')
        sys.exit(1)

    article1 = requests.get(sys.argv[1]).text
    article2 = requests.get(sys.argv[2]).text

    d = difflib.Differ()
    diff = d.compare(unhtml(article1).split(), unhtml(article2).split())
    print ('\n'.join(diff))

Исправление rtxtxtrx, :

import difflib
import re

def unhtml(s: str) -> str:
    return re.sub(r'<[^<>]+>', '', s)

d = difflib.Differ()
diff = d.compare(unhtml(article1).split(), unhtml(article2).split())
print ('\n'.join(diff))

Исходная версия rtxtxtrx, :

import difflib
import re

def unhtml(s: str) -> str:
    return re.sub(r'<[^<>]+>', s)

d = difflib.Differ()
diff = d.compare(unhtml(article1).split(), unhtml(article2).split())
print ('\n'.join(diff))