commit 6dd005c1f9ac0a1af3314c1dd5a0e5de244adc83 Author: Andros Fenollosa Date: Mon Oct 7 19:35:09 2019 +0200 First commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d18f225 --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +/target +/classes +/checkouts +profiles.clj +pom.xml +pom.xml.asc +*.jar +*.class +/.lein-* +/.nrepl-port +.hgignore +.hg/ diff --git a/project.clj b/project.clj new file mode 100644 index 0000000..8b35619 --- /dev/null +++ b/project.clj @@ -0,0 +1,11 @@ +(defproject wordpress-used "0.1.0-SNAPSHOT" + :description "FIXME: write description" + :url "http://example.com/FIXME" + :license {:name "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0" + :url "https://www.eclipse.org/legal/epl-2.0/"} + :dependencies [[org.clojure/clojure "1.10.0"] + [clj-http "3.10.0"] + [org.clojure/data.csv "0.1.4"] + ] + :main ^:skip-aot wordpress-used.core + :repl-options {:init-ns wordpress-used.core}) diff --git a/resources/top-1m-test.csv b/resources/top-1m-test.csv new file mode 100644 index 0000000..e546aea --- /dev/null +++ b/resources/top-1m-test.csv @@ -0,0 +1,20 @@ +1,google.com +2,youtube.com +3,baidu.com +4,tmall.com +5,qq.com +6,taobao.com +7,sohu.com +8,facebook.com +9,wikipedia.org +10,yahoo.com +11,login.tmall.com +12,amazon.com +13,360.cn +14,jd.com +15,weibo.com +16,sina.com.cn +17,live.com +18,reddit.com +19,pages.tmall.com +20,vk.com diff --git a/src/wordpress_used/core.clj b/src/wordpress_used/core.clj new file mode 100644 index 0000000..be513dc --- /dev/null +++ b/src/wordpress_used/core.clj @@ -0,0 +1,27 @@ +(ns wordpress-used.core + (:require + [clj-http.client :as client] + [clojure.data.csv :as csv] + [clojure.java.io :as io] + ) (:gen-class)) + +(defn wordpress? + "Check site used WordPress with meta generator" + [url] + (let [response (client/get (str "http://" url "/") {:ignore-unknown-host? true, :connection-timeout 5000, :throw-exceptions false})] + (every? identity [(re-find (re-pattern "meta.*generator.*WordPress") (:body response))]))) + + +(defn -main + [& args] + ;; Read CSV with all domains + (with-open [reader (io/reader (clojure.java.io/resource "top-1m-test.csv"))] + (doall + (let [domains (csv/read-csv reader) + ;; Check is WordPress + domains-with-wordpress (doall (map #(conj % (wordpress? (get % 1))) domains))] + ;;domains-with-wordpress (map #(conj % (wordpress? (get % 1))) domains)] + ;; Save CSV + (with-open [writer (io/writer (clojure.java.io/resource "top-1m-test.csv"))] + (csv/write-csv writer (vec domains-with-wordpress))) + )))) diff --git a/test/wordpress_used/core_test.clj b/test/wordpress_used/core_test.clj new file mode 100644 index 0000000..6e7c1c0 --- /dev/null +++ b/test/wordpress_used/core_test.clj @@ -0,0 +1,7 @@ +(ns wordpress-used.core-test + (:require [clojure.test :refer :all] + [wordpress-used.core :refer :all])) + +(deftest a-test + (testing "FIXME, I fail." + (is (= 0 1))))