История изменений

Исправление Nervous, 15.06.24 08:05 (текущая версия) :

(ns find-successive-chars
  (:require
   [clojure.java.io :as io]
   [clojure.string :as s]))

(defn successive-chars-regex
  "Returns a regex that matches a string containing (unicode) `chars` in order."
  [chars]
  (let [unicode-escaped-chars    (map #(format "\\u%04x" (int %)) chars)
        exclude-chars-subpattern (format "[^%s]*" (s/join unicode-escaped-chars))]
    (->> unicode-escaped-chars
         (reduce (fn [acc val]
                   (str acc val exclude-chars-subpattern)) exclude-chars-subpattern)
         re-pattern)))

(defn successive-chars?
  "Returns true if `string` contains (unicode) `chars` in order."
  [chars string]
  (boolean (re-matches (successive-chars-regex chars) string)))

(defn word-seq
  "Returns a sequence of (unicode) words in the `string`."
  [string]
  (re-seq #"\p{L}+" string))

(defn find-words
  "Returns a sequence of words in `file` that contain (unicode) `chars` in order."
  [file chars]
  (->> (line-seq (io/reader file))
       (keep word-seq)
       flatten
       (filter #(successive-chars? chars %))))

(find-words "/tmp/Звёздная пехота.txt" [\е \ч \н \о])
;; => ("вечно" "вечной" "величиной" "пессимистично" "вечно" "величиной" "безразлично" "вечной")

Исправление Nervous, 15.06.24 08:04:

(ns find-successive-chars
  (:require
   [clojure.java.io :as io]
   [clojure.string :as s]))

(defn successive-chars-regex
  "Returns a regex that matches a string containing given (unicode) chars in order."
  [chars]
  (let [unicode-escaped-chars    (map #(format "\\u%04x" (int %)) chars)
        exclude-chars-subpattern (format "[^%s]*" (s/join unicode-escaped-chars))]
    (->> unicode-escaped-chars
         (reduce (fn [acc val]
                   (str acc val exclude-chars-subpattern)) exclude-chars-subpattern)
         re-pattern)))

(defn successive-chars?
  "Returns true if `string` contains (unicode) `chars` in order."
  [chars string]
  (boolean (re-matches (successive-chars-regex chars) string)))

(defn word-seq
  "Returns a sequence of (unicode) words in the `string`."
  [string]
  (re-seq #"\p{L}+" string))

(defn find-words
  "Returns a sequence of words in `file` that contain (unicode) `chars` in order."
  [file chars]
  (->> (line-seq (io/reader file))
       (keep word-seq)
       flatten
       (filter #(successive-chars? chars %))))

(find-words "/tmp/Звёздная пехота.txt" [\е \ч \н \о])
;; => ("вечно" "вечной" "величиной" "пессимистично" "вечно" "величиной" "безразлично" "вечной")

Исправление Nervous, 15.06.24 07:52:

(ns find-successive-chars
  (:require
   [clojure.java.io :as io]
   [clojure.string :as s]))

(defn successive-chars-regex
  "Returns a regex that matches a string containing given (unicode) chars in order."
  [chars]
  (let [unicode-escaped-chars    (map #(format "\\u%04x" (int %)) chars)
        exclude-chars-subpattern (format "[^%s]*" (s/join unicode-escaped-chars))]
    (->> unicode-escaped-chars
         (reduce (fn [acc val]
                   (str acc val exclude-chars-subpattern)) exclude-chars-subpattern)
         re-pattern)))

(defn successive-chars?
  "Returns true if the string contains given (unicode) chars in order."
  [chars string]
  (boolean (re-matches (successive-chars-regex chars) string)))

(defn word-seq
  "Returns a sequence of (unicode) words in the string."
  [string]
  (re-seq #"\p{L}+" string))

(defn find-words
  "Returns a sequence of words in `file` that contain (unicode) `chars` in order."
  [file chars]
  (->> (line-seq (io/reader file))
       (keep word-seq)
       flatten
       (filter #(successive-chars? chars %))))

(find-words "/tmp/Звёздная пехота.txt" [\е \ч \н \о])
;; => ("вечно" "вечной" "величиной" "пессимистично" "вечно" "величиной" "безразлично" "вечной")

Исправление Nervous, 15.06.24 07:51:

(ns find-successive-chars
  (:require
   [clojure.java.io :as io]
   [clojure.string :as s]))

(defn successive-chars-regex
  "Returns a regex that matches a string containing given (unicode) chars in order."
  [chars]
  (let [unicode-escaped-chars    (map #(format "\\u%04x" (int %)) chars)
        exclude-chars-subpattern (format "[^%s]*" (s/join unicode-escaped-chars))]
    (->> unicode-escaped-chars
         (reduce (fn [acc val]
                   (str acc val exclude-chars-subpattern)) exclude-chars-subpattern)
         re-pattern)))

(defn successive-chars?
  "Returns true if the string contains given (unicode) chars in order."
  [chars string]
  (boolean (re-matches (successive-chars-regex chars) string)))

(defn word-seq
  "Returns a sequence of (unicode) words in the string."
  [string]
  (re-seq #"\p{L}+" string))

(defn find-words
  "Returns a sequence of words in `file` that contain `chars` in order."
  [file chars]
  (->> (line-seq (io/reader file))
       (keep word-seq)
       flatten
       (filter #(successive-chars? chars %))))

(find-words "/tmp/Звёздная пехота.txt" [\е \ч \н \о])
;; => ("вечно" "вечной" "величиной" "пессимистично" "вечно" "величиной" "безразлично" "вечной")

Исходная версия Nervous, 15.06.24 07:47:

(ns find-successive-chars
  (:require
   [clojure.java.io :as io]
   [clojure.string :as s]))

(defn successive-chars-regex
  "Returns a regex that matches a string containing given (unicode) chars in order."
  [chars]
  (let [unicode-escaped-chars    (map #(format "\\u%04x" (int %)) chars)
        exclude-chars-subpattern (format "[^%s]*" (s/join unicode-escaped-chars))]
    (->> unicode-escaped-chars
         (reduce (fn [acc val]
                   (str acc val exclude-chars-subpattern)) exclude-chars-subpattern)
         re-pattern)))

(defn successive-chars?
  "Returns true if the string contains given (unicode) chars in order."
  [chars string]
  (boolean (re-matches (successive-chars-regex chars) string)))


(defn word-seq
  "Returns a sequence of (unicode) words in the string."
  [string]
  (re-seq #"\p{L}+" string))

(defn find-words
  "Returns a sequence of words in `file` that contain `chars` in order."
  [file chars]
  (->> (line-seq (io/reader file))
       (keep word-seq)
       flatten
       (filter #(successive-chars? chars %))))

(find-words "/tmp/Звёздная пехота.txt" [\е \ч \н \о])
;; => ("вечно" "вечной" "величиной" "пессимистично" "вечно" "величиной" "безразлично" "вечной")