Very cool. I actually cleaned up the code a little bit more this
morning trying to speed things up a bit. It's still not as fast as I'd
like, but I'm not up to speed on Closure optimization either, so I
could be missing something.
Revised code:
(ns markov
(use clojure.contrib.str-utils))
(defn flatten
"Takes any nested combination of sequential things (lists, vectors,
etc.) and returns their contents as a single, flat sequence.
(flatten nil) returns nil."
[x]
(filter (complement sequential?)
(rest (tree-seq sequential? seq x))))
(defn rand-elt
"Return a random element of this seq"
[s]
(nth s (rand-int (count s))))
(defn clean [txt]
"clean given txt for symbols disruptive to markov chains"
(let [new-txt (re-gsub #"[:;,^\"()]" "" txt)
new-txt (re-gsub #"'(?!(d|t|ve|m|ll|s|de|re))" "" new-txt)]
new-txt))
(defn chain-lengths [markov-chain]
"return a set of lengths for each element in the collection"
(let [markov-keys (map keys markov-chain)]
(set (for [x markov-keys] (count x)))))
(defn max-chain-length [markov-chain]
"return the length lf the longest chain"
(apply max (chain-lengths markov-chain)))
(defn chain
"Take a list of words and build a markov chain out of them.
The length is the size of the key in number of words."
([words]
(chain words 3))
([words length]
(loop [markov-chain {}
keychain (for [x (range length)] nil)
words (map clean words)]
(let [first-word (first words)]
(if (seq words)
(recur (assoc markov-chain keychain
(cons first-word (get markov-chain keychain)))
(concat (rest keychain) [first-word])
(rest words))
(assoc markov-chain keychain []))))))
(defn split-sentence [text]
"Convert a string to a collection on common boundaries"
(filter seq (re-split #"[,.!?()\d]+\s*" text)))
(defn file-chain
"Create a markov chain from the contents of a given file"
([file]
(file-chain file 3))
([file length]
(let [sentences (split-sentence (slurp file))
flatten-list (fn [& x] (flatten (list x)))]
(loop [markov-chain {} words sentences]
(if (seq words)
(recur (merge-with flatten-list
markov-chain
(chain (re-split #"\s+" (first words))))
(rest words))
markov-chain)))))
(defn construct-sentence
"Build a sentence from a markov chain structure. Given a
Markov chain (any size key), Seed (to start the sentence) and
Proc (a function for choosing the next word), returns a sentence
composed until is reaches the end of a chain (an end of sentence)."
([markov-chain]
(construct-sentence markov-chain nil rand-elt))
([markov-chain seed]
(construct-sentence markov-chain seed rand-elt))
([markov-chain seed proc]
(loop [words (if seed seed (rand-elt (keys markov-chain)))
sentence (str-join " " (filter identity words))]
(if (seq (markov-chain words))
(let [word-new (proc (markov-chain words))]
(recur (concat (rest words) [word-new])
(str-join " " (into [sentence] [word-new]))))
sentence))))
On Apr 24, 12:00 pm, Luke VanderHart <[email protected]>
wrote:
> Cool... I actually did a Markov chain generator myself as one of my
> early Clojure projects. I posted about it at the DC Study group, here:
>
> http://groups.google.com/group/clojure-study-dc/browse_thread/thread/...
>
> It looks like yours is more succinct... I'll definitely have to take
> some time and compare our approaches.
>
> -Luke
>
> On Apr 24, 8:47 am, tmountain <[email protected]> wrote:
>
> > In an effort to learn more about Clojure, I decided to port a markov
> > text generator which a friend wrote in Python. After getting through a
> > few snags, I completed the program and decided to have some fun
> > feeding in some e-books downloaded from the Gutenberg project as
> > input. In this case, I chose Sherlock Holmes and Bram Stoker's Dracula
> > to create a bizarre mashup, which could be called Draclock Holmes or
> > something approximate. I had the program print out three-line snippits
> > of text, and some of the resulting text resembles a sort of absurd
> > poetry. I'd imagine if I let it churn and burn for a few hours, some
> > real gems could emerge.
>
> > acting in her interests
> > Mina's morning and evening hypnotic answer is unvaried
> > with devilish passion
>
> > she succeeded somewhat
> > swiftly and deftly
> > His look is a warning
>
> > together as we swept along
> > found myself lying on my bed trembling all over
> > Miss Stoner and I gazed at him in many tongues
>
> > my power to reward you for your services
> > common subject for conversation
> > throwing open another door
>
> > nine years in England
> > strong-faced old man
> > to mediaeval times
>
> > Here's the code. I'm new to Clojure, so I'm open to suggestions. It's
> > written in a purely functional non-destructive fashion; although, I'm
> > sure a few things could be improved.
>
> > (ns markov
> > (use clojure.contrib.str-utils))
>
> > (defn rand-nth [coll]
> > "return a random element from a collection"
> > (nth (seq coll) (rand-int (count coll))))
>
> > (defn clean [txt]
> > "clean given txt for symbols disruptive to markov chains"
> > (let [new-txt (re-gsub #"[:;,^\"()]" "" txt)
> > new-txt (re-gsub #"'(?!(d|t|ve|m|ll|s|de|re))" "" new-txt)]
> > new-txt))
>
> > (defn chain-lengths [markov-chain]
> > "return a set of lengths for each element in the collection"
> > (let [markov-keys (map keys markov-chain)]
> > (set (for [x markov-keys] (count x)))))
>
> > (defn max-chain-length [markov-chain]
> > "return the length lf the longest chain"
> > (apply max (chain-lengths markov-chain)))
>
> > (defn flatten [x]
> > "Flatten a collection"
> > (let [s? #(instance? clojure.lang.Sequential %)]
> > (filter (complement s?) (tree-seq s? seq x))))
>
> > (defn build-chain [markov-chain keychain words]
> > "Builds a markov chain"
> > (let [first-word (first words)]
> > (if (seq words)
> > (recur (assoc markov-chain keychain
> > (cons first-word (get markov-chain keychain)))
> > (concat (rest keychain) [first-word])
> > (rest words))
> > (assoc markov-chain keychain []))))
>
> > (defn chain
> > "Take a list of words and build a markov chain out of them.
> > The length is the size of the key in number of words."
> > ([words]
> > (chain words 3))
> > ([words length]
> > (build-chain {} (for [x (range length)] nil) (map clean words))))
>
> > (defn split-sentence [text]
> > "Convert a string to a collection on common boundaries"
> > (filter seq (re-split #"[,.!?()\d]+\s*" text)))
>
> > (defn file-chain
> > "Create a markov chain from the contents of a given file"
> > ([file]
> > (file-chain file 3))
> > ([file length]
> > (let [sentences (split-sentence (slurp file))
> > flatten-list (fn [& x] (flatten (list x)))]
> > (loop [markov-chain {} words sentences]
> > (if (seq words)
> > (recur (merge-with flatten-list
> > markov-chain
> > (chain (re-split #"\s+" (first words))))
> > (rest words))
> > markov-chain)))))
>
> > (defn construct-sentence
> > "Build a sentence from a markov chain structure. Given a
> > Markov chain (any size key), Seed (to start the sentence) and
> > Proc (a function for choosing the next word), returns a sentence
> > composed until is reaches the end of a chain (an end of sentence)."
> > ([markov-chain]
> > (construct-sentence markov-chain nil rand-nth))
> > ([markov-chain seed]
> > (construct-sentence markov-chain seed rand-nth))
> > ([markov-chain seed proc]
> > (loop [words (if seed seed (rand-nth (keys markov-chain)))
> > sentence (str-join " " (filter identity words))]
> > (if (seq (markov-chain words))
> > (let [word-new (proc (markov-chain words))]
> > (recur (concat (rest words) [word-new])
> > (str-join " " (into [sentence] [word-new]))))
> > sentence))))
>
> > Example usage:
>
> > (ns main (use markov))
> > (def markov (file-chain "draclock.txt"))
> > (doseq [x (range 100)]
> > (doseq [x (range 3)] (println (construct-sentence markov)))
> > (println))
>
> > Input
> > files:http://www.gutenberg.org/files/345/345.txt-draculahttp://www.gutenberg.org/dirs/etext99/advsh12.txt-sherlock
> > holmes
>
> > I just cat them together to make draclock.txt ;-)
>
> > Cheers!
> > Travis
--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google Groups
"Clojure" group.
To post to this group, send email to [email protected]
To unsubscribe from this group, send email to
[email protected]
For more options, visit this group at
http://groups.google.com/group/clojure?hl=en
-~----------~----~----~----~------~----~------~--~---