diff --git a/Makefile b/Makefile index faa71f1..2d5d551 100644 --- a/Makefile +++ b/Makefile @@ -14,3 +14,6 @@ prepare: ## Download and format csv with domains run: ## Run checks lein run + +calculate: ## Calculate the percentage + (awk 'END {print NR}' resources/top-1m.csv) / (cat resources/top-1m.csv | grep ',true' | wc -l) diff --git a/project.clj b/project.clj index 51dd63f..b907c97 100644 --- a/project.clj +++ b/project.clj @@ -3,8 +3,7 @@ :url "http://example.com/FIXME" :license {:name "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0" :url "https://www.eclipse.org/legal/epl-2.0/"} - :dependencies [[org.clojure/clojure "1.10.0"] - [clj-http "3.10.0"] + :dependencies [[org.clojure/clojure "1.10.1"] [org.clojure/data.csv "0.1.4"]] :jvm-opts ["-Xmx1G"] :main ^:skip-aot wordpress-used.core diff --git a/src/wordpress_used/core.clj b/src/wordpress_used/core.clj index e5541e8..7eb06f7 100644 --- a/src/wordpress_used/core.clj +++ b/src/wordpress_used/core.clj @@ -1,26 +1,14 @@ (ns wordpress-used.core (:require - [clj-http.client :as client] [clojure.data.csv :as csv] [clojure.java.io :as io] [clojure.java.shell :as shell] ) (:gen-class)) -(def headers {"User-Agent" "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:69.0) Gecko/20100101 Firefox/69.0" - "Accept" "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" - "Accept-Language" "es,en-US;q=0.7,en;q=0.3" - "Accept-Encoding" "gzip, deflate, br" - "DNT" "1" - "Connection" "keep-alive" - "Upgrade-Insecure-Requests" "1" - "Pragma" "no-cache" - "Cache-Control" "no-cache" - "TE" "Trailers"}) -(def http-config - {:headers headers - :ignore-unknown-host? true - :connection-timeout 5000 - :throw-exceptions false}) +(defn request + "Make a request by means of curl" + [url] + (shell/sh "curl" "-L" "-H" "User-Agent: Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:69.0) Gecko/20100101 Firefox/69.0" "-H" "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" "-H" "Accept-Language: es,en-US;q=0.7,en;q=0.3" "-H" "DNT: 1" "-H" "Connection: keep-alive" "-H" "Upgrade-Insecure-Requests: 1" "-H" "Pragma: no-cache" "-H" "Cache-Control: no-cache" url)) (defn read-csv-domains "Read CSV file with all domains" @@ -31,11 +19,8 @@ (defn wordpress? "Check if a web page is generated with WordPress" [url] - (try - (let [response (client/get (str "http://" url "/") http-config)] - (every? identity [(re-find (re-pattern "meta.*generator.*WordPress") (:body response))])) - (catch Exception e - "timeout"))) + (let [response (request url)] + (every? identity [(re-find (re-pattern "meta.*generator.*WordPress") (:out response))]))) (defn -main [& args]