LINUX.ORG.RU

Emacs, добавление заголовка к ссылке в org-mode (2)

 , ,


0

1

Старая тема (Emacs, добавление заголовка к ссылке в org-mode) уже в архиве. Но прогресс не остановить.

Понадобилось добавить отмену по timeout. Спросил у ChatGPT o1-preview. Он выдал мне такой рабочий код:

(defun org-link-describe (url &optional descr)
  "Retrieve the HTML title from a URL with a 3-second timeout using request.el."
  (require 'request)
  (require 'dom)
  (let ((result nil)
        (done nil)
        (timeout 3)
        (request-backend 'url-retrieve))  ;; Use url-retrieve backend
    ;; Send the HTTP request
    (request
     url
     :timeout timeout
     :headers '(("User-Agent" . "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 \
(KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"))
     :parser (lambda ()
               ;; Parse the HTML content into a DOM
               (libxml-parse-html-region (point) (point-max)))
     :success (cl-function
               (lambda (&key data &allow-other-keys)
                 ;; Extract the <title> element
                 (let ((title-node (dom-by-tag data 'title)))
                   (when title-node
                     (setq result (dom-text (car title-node)))))
                 (setq done t)))  ;; Ensure this is inside the lambda
     :error (cl-function
             (lambda (&rest args &key error-thrown &allow-other-keys)
               (message "Error fetching URL: %s" error-thrown)
               (setq done t))))   ;; Ensure this is inside the lambda
    ;; Wait for the request to finish or timeout
    (let ((wait-time 0)
          (step 0.1))
      (while (and (not done)
                  (< wait-time timeout))
        (sleep-for step)
        (setq wait-time (+ wait-time step))))
    result))

;; Fill a description for a link
(setq org-make-link-description-function 'org-link-describe)

Но этот код не работает со страницами в кодировке koi8-r, например, opennet. Предложения по улучшению кода всячески приветствуются (я не знаю ELisp).



Последнее исправление: o_-- (всего исправлений: 1)

Вот последняя попытка (неудачная) нейросети решить проблему с кодировкой.

(defun org-link-describe (url &optional descr)
  "Retrieve the HTML title from a URL with a 3-second timeout using request.el."
  (require 'request)
  (require 'dom)
  (require 'mm-util)
  (let ((request-backend 'curl))  ;; Use curl as the backend
    (org-link-describe-fetch-title url)))

(defun org-link-describe-fetch-title (url)
  "Fetch the HTML title from the URL with proper timeout and encoding handling."
  (let ((result nil)
        (done nil)
        (timeout 3))
    ;; Send the HTTP request
    (request
     url
     :timeout timeout
     :headers '(("User-Agent" . "Mozilla/5.0 (compatible; Emacs)")
                ("Accept-Encoding" . "gzip, deflate"))
     :parser #'org-link-describe-parse-response
     :success (cl-function
               (lambda (&key data &allow-other-keys)
                 (setq result data)
                 (setq done t)))
     :error (cl-function
             (lambda (&rest _args &key error-thrown &allow-other-keys)
               (message "Error fetching URL: %s" error-thrown)
               (setq done t))))
    ;; Wait for the request to finish or timeout
    (let ((wait-time 0)
          (step 0.1))
      (while (and (not done)
                  (< wait-time timeout))
        (sleep-for step)
        (setq wait-time (+ wait-time step))))
    result))

(defun org-link-describe-parse-response ()
  "Parse the HTTP response to extract the HTML title."
  (let* ((response request)
         ;; Get Content-Type header
         (content-type (or (request-response-header response "Content-Type") ""))
         ;; Extract charset from Content-Type header
         (charset (org-link-describe-extract-charset content-type)))
    ;; If charset not found in Content-Type, look for meta tag
    (unless charset
      (setq charset (org-link-describe-extract-charset-from-meta)))
    ;; Default to utf-8 if charset is still not found
    (unless charset
      (setq charset "utf-8"))
    ;; Convert charset to coding system
    (let ((coding-system (or (mm-charset-to-coding-system (downcase charset))
                             'utf-8)))
      ;; Decode the buffer using the detected coding system
      (if coding-system
          (mm-decode-coding-region (point-min) (point-max) coding-system)
        (message "Unknown charset %s, defaulting to utf-8" charset)
        (mm-decode-coding-region (point-min) (point-max) 'utf-8)))
    ;; Parse the buffer into a DOM
    (let ((dom (libxml-parse-html-region (point-min) (point-max))))
      ;; Extract and return the <title> text
      (let ((title-node (dom-by-tag dom 'title)))
        (when title-node
          (dom-text (car title-node)))))))

(defun org-link-describe-extract-charset (content-type)
  "Extract charset from Content-Type header."
  (when (string-match "charset=\\([^;]+\\)" content-type)
    (match-string 1 content-type)))

(defun org-link-describe-extract-charset-from-meta ()
  "Extract charset from HTML meta tags."
  (goto-char (point-min))
  (when (re-search-forward "<meta[^>]*charset=[\"']?\\([^\"'/> ]+\\)" nil t)
    (match-string 1)))
o_--
() автор топика

страницами в кодировке koi8-r, например, opennet

Ух ты там и правда koi8. Молодцы.

firkax ★★★★★
()
Для того чтобы оставить комментарий войдите или зарегистрируйтесь.