Compare commits
2 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
8f09d77647 | ||
|
de49bade47 |
3
Makefile
3
Makefile
@ -14,3 +14,6 @@ prepare: ## Download and format csv with domains
|
|||||||
|
|
||||||
run: ## Run checks
|
run: ## Run checks
|
||||||
lein run
|
lein run
|
||||||
|
|
||||||
|
calculate: ## Calculate the percentage
|
||||||
|
(awk 'END {print NR}' resources/top-1m.csv) / (cat resources/top-1m.csv | grep ',true' | wc -l)
|
||||||
|
@ -3,8 +3,7 @@
|
|||||||
:url "http://example.com/FIXME"
|
:url "http://example.com/FIXME"
|
||||||
:license {:name "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"
|
:license {:name "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"
|
||||||
:url "https://www.eclipse.org/legal/epl-2.0/"}
|
:url "https://www.eclipse.org/legal/epl-2.0/"}
|
||||||
:dependencies [[org.clojure/clojure "1.10.0"]
|
:dependencies [[org.clojure/clojure "1.10.1"]
|
||||||
[clj-http "3.10.0"]
|
|
||||||
[org.clojure/data.csv "0.1.4"]]
|
[org.clojure/data.csv "0.1.4"]]
|
||||||
:jvm-opts ["-Xmx1G"]
|
:jvm-opts ["-Xmx1G"]
|
||||||
:main ^:skip-aot wordpress-used.core
|
:main ^:skip-aot wordpress-used.core
|
||||||
|
@ -1,26 +1,14 @@
|
|||||||
(ns wordpress-used.core
|
(ns wordpress-used.core
|
||||||
(:require
|
(:require
|
||||||
[clj-http.client :as client]
|
|
||||||
[clojure.data.csv :as csv]
|
[clojure.data.csv :as csv]
|
||||||
[clojure.java.io :as io]
|
[clojure.java.io :as io]
|
||||||
[clojure.java.shell :as shell]
|
[clojure.java.shell :as shell]
|
||||||
) (:gen-class))
|
) (:gen-class))
|
||||||
|
|
||||||
(def headers {"User-Agent" "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:69.0) Gecko/20100101 Firefox/69.0"
|
(defn request
|
||||||
"Accept" "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
|
"Make a request by means of curl"
|
||||||
"Accept-Language" "es,en-US;q=0.7,en;q=0.3"
|
[url]
|
||||||
"Accept-Encoding" "gzip, deflate, br"
|
(shell/sh "curl" "-L" "--max-time" "5" "-H" "User-Agent: Firefox" url))
|
||||||
"DNT" "1"
|
|
||||||
"Connection" "keep-alive"
|
|
||||||
"Upgrade-Insecure-Requests" "1"
|
|
||||||
"Pragma" "no-cache"
|
|
||||||
"Cache-Control" "no-cache"
|
|
||||||
"TE" "Trailers"})
|
|
||||||
(def http-config
|
|
||||||
{:headers headers
|
|
||||||
:ignore-unknown-host? true
|
|
||||||
:connection-timeout 5000
|
|
||||||
:throw-exceptions false})
|
|
||||||
|
|
||||||
(defn read-csv-domains
|
(defn read-csv-domains
|
||||||
"Read CSV file with all domains"
|
"Read CSV file with all domains"
|
||||||
@ -31,11 +19,8 @@
|
|||||||
(defn wordpress?
|
(defn wordpress?
|
||||||
"Check if a web page is generated with WordPress"
|
"Check if a web page is generated with WordPress"
|
||||||
[url]
|
[url]
|
||||||
(try
|
(let [response (request url)]
|
||||||
(let [response (client/get (str "http://" url "/") http-config)]
|
(every? identity [(re-find (re-pattern "meta.*generator.*WordPress") (:out response))])))
|
||||||
(every? identity [(re-find (re-pattern "meta.*generator.*WordPress") (:body response))]))
|
|
||||||
(catch Exception e
|
|
||||||
"timeout")))
|
|
||||||
|
|
||||||
(defn -main
|
(defn -main
|
||||||
[& args]
|
[& args]
|
||||||
|
Loading…
Reference in New Issue
Block a user