Update project and core
This commit is contained in:
parent
6dd005c1f9
commit
612e713c33
@ -1,11 +1,10 @@
|
|||||||
(defproject wordpress-used "0.1.0-SNAPSHOT"
|
(defproject wordpress-used "1.0.0-SNAPSHOT"
|
||||||
:description "FIXME: write description"
|
:description "Calculates WordPress usage index from a CSV list of domains"
|
||||||
:url "http://example.com/FIXME"
|
:url "http://example.com/FIXME"
|
||||||
:license {:name "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"
|
:license {:name "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"
|
||||||
:url "https://www.eclipse.org/legal/epl-2.0/"}
|
:url "https://www.eclipse.org/legal/epl-2.0/"}
|
||||||
:dependencies [[org.clojure/clojure "1.10.0"]
|
:dependencies [[org.clojure/clojure "1.10.0"]
|
||||||
[clj-http "3.10.0"]
|
[clj-http "3.10.0"]
|
||||||
[org.clojure/data.csv "0.1.4"]
|
[org.clojure/data.csv "0.1.4"]]
|
||||||
]
|
|
||||||
:main ^:skip-aot wordpress-used.core
|
:main ^:skip-aot wordpress-used.core
|
||||||
:repl-options {:init-ns wordpress-used.core})
|
:repl-options {:init-ns wordpress-used.core})
|
||||||
|
@ -1,20 +1,4 @@
|
|||||||
1,google.com
|
1,google.com
|
||||||
2,youtube.com
|
2,youtube.com
|
||||||
3,baidu.com
|
|
||||||
4,tmall.com
|
4,tmall.com
|
||||||
5,qq.com
|
5,idecrea.es
|
||||||
6,taobao.com
|
|
||||||
7,sohu.com
|
|
||||||
8,facebook.com
|
|
||||||
9,wikipedia.org
|
|
||||||
10,yahoo.com
|
|
||||||
11,login.tmall.com
|
|
||||||
12,amazon.com
|
|
||||||
13,360.cn
|
|
||||||
14,jd.com
|
|
||||||
15,weibo.com
|
|
||||||
16,sina.com.cn
|
|
||||||
17,live.com
|
|
||||||
18,reddit.com
|
|
||||||
19,pages.tmall.com
|
|
||||||
20,vk.com
|
|
|
@ -5,23 +5,45 @@
|
|||||||
[clojure.java.io :as io]
|
[clojure.java.io :as io]
|
||||||
) (:gen-class))
|
) (:gen-class))
|
||||||
|
|
||||||
|
(defn read-csv-domains
|
||||||
|
"Read CSV file with all domains"
|
||||||
|
[url]
|
||||||
|
(with-open [reader (io/reader (io/resource url))]
|
||||||
|
(doall (csv/read-csv reader))))
|
||||||
|
|
||||||
|
(defn save-csv-domains
|
||||||
|
"Save the list with the domains in a CSV file"
|
||||||
|
[url new-domains]
|
||||||
|
(with-open [writer (io/writer url)]
|
||||||
|
(csv/write-csv writer new-domains)))
|
||||||
|
|
||||||
(defn wordpress?
|
(defn wordpress?
|
||||||
"Check site used WordPress with meta generator"
|
"Check if a web page is generated with WordPress"
|
||||||
[url]
|
[url]
|
||||||
(let [response (client/get (str "http://" url "/") {:ignore-unknown-host? true, :connection-timeout 5000, :throw-exceptions false})]
|
(let [response (client/get (str "http://" url "/") {:ignore-unknown-host? true, :connection-timeout 5000, :throw-exceptions false})]
|
||||||
(every? identity [(re-find (re-pattern "meta.*generator.*WordPress") (:body response))])))
|
(every? identity [(re-find (re-pattern "meta.*generator.*WordPress") (:body response))])))
|
||||||
|
|
||||||
|
|
||||||
(defn -main
|
(defn -main
|
||||||
[& args]
|
[& args]
|
||||||
;; Read CSV with all domains
|
(let [;; Name of the file containing the CSV with the domains
|
||||||
(with-open [reader (io/reader (clojure.java.io/resource "top-1m-test.csv"))]
|
file-csv "top-1m-test.csv"
|
||||||
(doall
|
;; List with domains
|
||||||
(let [domains (csv/read-csv reader)
|
domains (read-csv-domains file-csv)
|
||||||
;; Check is WordPress
|
;; List with domains with a boolean indicating if it is generate or not in WordPress
|
||||||
domains-with-wordpress (doall (map #(conj % (wordpress? (get % 1))) domains))]
|
domains-checks (doall (vec (map #(conj % (wordpress? (get % 1))) domains)))]
|
||||||
;;domains-with-wordpress (map #(conj % (wordpress? (get % 1))) domains)]
|
;; Save domains to CSV
|
||||||
;; Save CSV
|
(save-csv-domains file-csv domains-checks)))
|
||||||
(with-open [writer (io/writer (clojure.java.io/resource "top-1m-test.csv"))]
|
|
||||||
(csv/write-csv writer (vec domains-with-wordpress)))
|
;; (defn -main
|
||||||
))))
|
;; [& args]
|
||||||
|
;; ;; Read CSV with all domains
|
||||||
|
;; (with-open [reader (io/reader (clojure.java.io/resource "top-1m-test.csv"))]
|
||||||
|
;; (doall
|
||||||
|
;; (let [domains (csv/read-csv reader)
|
||||||
|
;; ;; Check is WordPress
|
||||||
|
;; domains-with-wordpress (vec (map #(conj % (wordpress? (get % 1))) domains))]
|
||||||
|
|
||||||
|
;; ;; Save CSV
|
||||||
|
;; (with-open [writer (io/writer (clojure.java.io/resource "top-1m-test.csv"))]
|
||||||
|
;; (csv/write-csv writer (doall domains-with-wordpress)))
|
||||||
|
;; ))))
|
||||||
|
4
top-1m-test.csv
Normal file
4
top-1m-test.csv
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
1,google.com,false
|
||||||
|
2,youtube.com,false
|
||||||
|
4,tmall.com,false
|
||||||
|
5,idecrea.es,true
|
|
Loading…
Reference in New Issue
Block a user