First commit
This commit is contained in:
commit
6dd005c1f9
12
.gitignore
vendored
Normal file
12
.gitignore
vendored
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
/target
|
||||||
|
/classes
|
||||||
|
/checkouts
|
||||||
|
profiles.clj
|
||||||
|
pom.xml
|
||||||
|
pom.xml.asc
|
||||||
|
*.jar
|
||||||
|
*.class
|
||||||
|
/.lein-*
|
||||||
|
/.nrepl-port
|
||||||
|
.hgignore
|
||||||
|
.hg/
|
11
project.clj
Normal file
11
project.clj
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
(defproject wordpress-used "0.1.0-SNAPSHOT"
|
||||||
|
:description "FIXME: write description"
|
||||||
|
:url "http://example.com/FIXME"
|
||||||
|
:license {:name "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"
|
||||||
|
:url "https://www.eclipse.org/legal/epl-2.0/"}
|
||||||
|
:dependencies [[org.clojure/clojure "1.10.0"]
|
||||||
|
[clj-http "3.10.0"]
|
||||||
|
[org.clojure/data.csv "0.1.4"]
|
||||||
|
]
|
||||||
|
:main ^:skip-aot wordpress-used.core
|
||||||
|
:repl-options {:init-ns wordpress-used.core})
|
20
resources/top-1m-test.csv
Normal file
20
resources/top-1m-test.csv
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
1,google.com
|
||||||
|
2,youtube.com
|
||||||
|
3,baidu.com
|
||||||
|
4,tmall.com
|
||||||
|
5,qq.com
|
||||||
|
6,taobao.com
|
||||||
|
7,sohu.com
|
||||||
|
8,facebook.com
|
||||||
|
9,wikipedia.org
|
||||||
|
10,yahoo.com
|
||||||
|
11,login.tmall.com
|
||||||
|
12,amazon.com
|
||||||
|
13,360.cn
|
||||||
|
14,jd.com
|
||||||
|
15,weibo.com
|
||||||
|
16,sina.com.cn
|
||||||
|
17,live.com
|
||||||
|
18,reddit.com
|
||||||
|
19,pages.tmall.com
|
||||||
|
20,vk.com
|
|
27
src/wordpress_used/core.clj
Normal file
27
src/wordpress_used/core.clj
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
(ns wordpress-used.core
|
||||||
|
(:require
|
||||||
|
[clj-http.client :as client]
|
||||||
|
[clojure.data.csv :as csv]
|
||||||
|
[clojure.java.io :as io]
|
||||||
|
) (:gen-class))
|
||||||
|
|
||||||
|
(defn wordpress?
|
||||||
|
"Check site used WordPress with meta generator"
|
||||||
|
[url]
|
||||||
|
(let [response (client/get (str "http://" url "/") {:ignore-unknown-host? true, :connection-timeout 5000, :throw-exceptions false})]
|
||||||
|
(every? identity [(re-find (re-pattern "meta.*generator.*WordPress") (:body response))])))
|
||||||
|
|
||||||
|
|
||||||
|
(defn -main
|
||||||
|
[& args]
|
||||||
|
;; Read CSV with all domains
|
||||||
|
(with-open [reader (io/reader (clojure.java.io/resource "top-1m-test.csv"))]
|
||||||
|
(doall
|
||||||
|
(let [domains (csv/read-csv reader)
|
||||||
|
;; Check is WordPress
|
||||||
|
domains-with-wordpress (doall (map #(conj % (wordpress? (get % 1))) domains))]
|
||||||
|
;;domains-with-wordpress (map #(conj % (wordpress? (get % 1))) domains)]
|
||||||
|
;; Save CSV
|
||||||
|
(with-open [writer (io/writer (clojure.java.io/resource "top-1m-test.csv"))]
|
||||||
|
(csv/write-csv writer (vec domains-with-wordpress)))
|
||||||
|
))))
|
7
test/wordpress_used/core_test.clj
Normal file
7
test/wordpress_used/core_test.clj
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
(ns wordpress-used.core-test
|
||||||
|
(:require [clojure.test :refer :all]
|
||||||
|
[wordpress-used.core :refer :all]))
|
||||||
|
|
||||||
|
(deftest a-test
|
||||||
|
(testing "FIXME, I fail."
|
||||||
|
(is (= 0 1))))
|
Loading…
Reference in New Issue
Block a user