Compare commits

...

2 Commits
master ... curl

Author SHA1 Message Date
Andros Fenollosa
8f09d77647 Add max time 2019-10-13 19:31:18 +02:00
Andros Fenollosa
de49bade47 Update request to curl 2019-10-13 19:01:22 +02:00
3 changed files with 10 additions and 23 deletions

View File

@ -14,3 +14,6 @@ prepare: ## Download and format csv with domains
run: ## Run checks
lein run
calculate: ## Calculate the percentage
(awk 'END {print NR}' resources/top-1m.csv) / (cat resources/top-1m.csv | grep ',true' | wc -l)

View File

@ -3,8 +3,7 @@
:url "http://example.com/FIXME"
:license {:name "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"
:url "https://www.eclipse.org/legal/epl-2.0/"}
:dependencies [[org.clojure/clojure "1.10.0"]
[clj-http "3.10.0"]
:dependencies [[org.clojure/clojure "1.10.1"]
[org.clojure/data.csv "0.1.4"]]
:jvm-opts ["-Xmx1G"]
:main ^:skip-aot wordpress-used.core

View File

@ -1,26 +1,14 @@
(ns wordpress-used.core
(:require
[clj-http.client :as client]
[clojure.data.csv :as csv]
[clojure.java.io :as io]
[clojure.java.shell :as shell]
) (:gen-class))
(def headers {"User-Agent" "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:69.0) Gecko/20100101 Firefox/69.0"
"Accept" "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
"Accept-Language" "es,en-US;q=0.7,en;q=0.3"
"Accept-Encoding" "gzip, deflate, br"
"DNT" "1"
"Connection" "keep-alive"
"Upgrade-Insecure-Requests" "1"
"Pragma" "no-cache"
"Cache-Control" "no-cache"
"TE" "Trailers"})
(def http-config
{:headers headers
:ignore-unknown-host? true
:connection-timeout 5000
:throw-exceptions false})
(defn request
"Make a request by means of curl"
[url]
(shell/sh "curl" "-L" "--max-time" "5" "-H" "User-Agent: Firefox" url))
(defn read-csv-domains
"Read CSV file with all domains"
@ -31,11 +19,8 @@
(defn wordpress?
"Check if a web page is generated with WordPress"
[url]
(try
(let [response (client/get (str "http://" url "/") http-config)]
(every? identity [(re-find (re-pattern "meta.*generator.*WordPress") (:body response))]))
(catch Exception e
"timeout")))
(let [response (request url)]
(every? identity [(re-find (re-pattern "meta.*generator.*WordPress") (:out response))])))
(defn -main
[& args]