Update request to curl

This commit is contained in:
Andros Fenollosa 2019-10-13 19:01:22 +02:00
parent 3275226ab5
commit de49bade47
3 changed files with 10 additions and 23 deletions

View File

@ -14,3 +14,6 @@ prepare: ## Download and format csv with domains
run: ## Run checks run: ## Run checks
lein run lein run
calculate: ## Calculate the percentage
(awk 'END {print NR}' resources/top-1m.csv) / (cat resources/top-1m.csv | grep ',true' | wc -l)

View File

@ -3,8 +3,7 @@
:url "http://example.com/FIXME" :url "http://example.com/FIXME"
:license {:name "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0" :license {:name "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"
:url "https://www.eclipse.org/legal/epl-2.0/"} :url "https://www.eclipse.org/legal/epl-2.0/"}
:dependencies [[org.clojure/clojure "1.10.0"] :dependencies [[org.clojure/clojure "1.10.1"]
[clj-http "3.10.0"]
[org.clojure/data.csv "0.1.4"]] [org.clojure/data.csv "0.1.4"]]
:jvm-opts ["-Xmx1G"] :jvm-opts ["-Xmx1G"]
:main ^:skip-aot wordpress-used.core :main ^:skip-aot wordpress-used.core

View File

@ -1,26 +1,14 @@
(ns wordpress-used.core (ns wordpress-used.core
(:require (:require
[clj-http.client :as client]
[clojure.data.csv :as csv] [clojure.data.csv :as csv]
[clojure.java.io :as io] [clojure.java.io :as io]
[clojure.java.shell :as shell] [clojure.java.shell :as shell]
) (:gen-class)) ) (:gen-class))
(def headers {"User-Agent" "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:69.0) Gecko/20100101 Firefox/69.0" (defn request
"Accept" "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" "Make a request by means of curl"
"Accept-Language" "es,en-US;q=0.7,en;q=0.3" [url]
"Accept-Encoding" "gzip, deflate, br" (shell/sh "curl" "-L" "-H" "User-Agent: Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:69.0) Gecko/20100101 Firefox/69.0" "-H" "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" "-H" "Accept-Language: es,en-US;q=0.7,en;q=0.3" "-H" "DNT: 1" "-H" "Connection: keep-alive" "-H" "Upgrade-Insecure-Requests: 1" "-H" "Pragma: no-cache" "-H" "Cache-Control: no-cache" url))
"DNT" "1"
"Connection" "keep-alive"
"Upgrade-Insecure-Requests" "1"
"Pragma" "no-cache"
"Cache-Control" "no-cache"
"TE" "Trailers"})
(def http-config
{:headers headers
:ignore-unknown-host? true
:connection-timeout 5000
:throw-exceptions false})
(defn read-csv-domains (defn read-csv-domains
"Read CSV file with all domains" "Read CSV file with all domains"
@ -31,11 +19,8 @@
(defn wordpress? (defn wordpress?
"Check if a web page is generated with WordPress" "Check if a web page is generated with WordPress"
[url] [url]
(try (let [response (request url)]
(let [response (client/get (str "http://" url "/") http-config)] (every? identity [(re-find (re-pattern "meta.*generator.*WordPress") (:out response))])))
(every? identity [(re-find (re-pattern "meta.*generator.*WordPress") (:body response))]))
(catch Exception e
"timeout")))
(defn -main (defn -main
[& args] [& args]