First commit

This commit is contained in:
Andros Fenollosa 2019-10-07 19:35:09 +02:00
commit 6dd005c1f9
5 changed files with 77 additions and 0 deletions

12
.gitignore vendored Normal file
View File

@ -0,0 +1,12 @@
/target
/classes
/checkouts
profiles.clj
pom.xml
pom.xml.asc
*.jar
*.class
/.lein-*
/.nrepl-port
.hgignore
.hg/

11
project.clj Normal file
View File

@ -0,0 +1,11 @@
(defproject wordpress-used "0.1.0-SNAPSHOT"
:description "FIXME: write description"
:url "http://example.com/FIXME"
:license {:name "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"
:url "https://www.eclipse.org/legal/epl-2.0/"}
:dependencies [[org.clojure/clojure "1.10.0"]
[clj-http "3.10.0"]
[org.clojure/data.csv "0.1.4"]
]
:main ^:skip-aot wordpress-used.core
:repl-options {:init-ns wordpress-used.core})

20
resources/top-1m-test.csv Normal file
View File

@ -0,0 +1,20 @@
1,google.com
2,youtube.com
3,baidu.com
4,tmall.com
5,qq.com
6,taobao.com
7,sohu.com
8,facebook.com
9,wikipedia.org
10,yahoo.com
11,login.tmall.com
12,amazon.com
13,360.cn
14,jd.com
15,weibo.com
16,sina.com.cn
17,live.com
18,reddit.com
19,pages.tmall.com
20,vk.com
1 1 google.com
2 2 youtube.com
3 3 baidu.com
4 4 tmall.com
5 5 qq.com
6 6 taobao.com
7 7 sohu.com
8 8 facebook.com
9 9 wikipedia.org
10 10 yahoo.com
11 11 login.tmall.com
12 12 amazon.com
13 13 360.cn
14 14 jd.com
15 15 weibo.com
16 16 sina.com.cn
17 17 live.com
18 18 reddit.com
19 19 pages.tmall.com
20 20 vk.com

View File

@ -0,0 +1,27 @@
(ns wordpress-used.core
(:require
[clj-http.client :as client]
[clojure.data.csv :as csv]
[clojure.java.io :as io]
) (:gen-class))
(defn wordpress?
"Check site used WordPress with meta generator"
[url]
(let [response (client/get (str "http://" url "/") {:ignore-unknown-host? true, :connection-timeout 5000, :throw-exceptions false})]
(every? identity [(re-find (re-pattern "meta.*generator.*WordPress") (:body response))])))
(defn -main
[& args]
;; Read CSV with all domains
(with-open [reader (io/reader (clojure.java.io/resource "top-1m-test.csv"))]
(doall
(let [domains (csv/read-csv reader)
;; Check is WordPress
domains-with-wordpress (doall (map #(conj % (wordpress? (get % 1))) domains))]
;;domains-with-wordpress (map #(conj % (wordpress? (get % 1))) domains)]
;; Save CSV
(with-open [writer (io/writer (clojure.java.io/resource "top-1m-test.csv"))]
(csv/write-csv writer (vec domains-with-wordpress)))
))))

View File

@ -0,0 +1,7 @@
(ns wordpress-used.core-test
(:require [clojure.test :refer :all]
[wordpress-used.core :refer :all]))
(deftest a-test
(testing "FIXME, I fail."
(is (= 0 1))))