Add repo is-wordpress

This commit is contained in:
Andros Fenollosa 2019-12-04 19:10:34 +01:00
parent 91584d565f
commit e6eef9b5e9
4 changed files with 8 additions and 9 deletions

3
.gitmodules vendored Normal file
View File

@ -0,0 +1,3 @@
[submodule "is-wordpress"]
path = is-wordpress
url = git@github.com:tanrax/is-wordpress.git

View File

@ -5,4 +5,4 @@ NUM_NOT_WORDPRESS=$(cat $CSV | grep ,false$ | wc -l)
NUM_TIMEOUT=$(cat $CSV | grep ,timeout$ | wc -l) NUM_TIMEOUT=$(cat $CSV | grep ,timeout$ | wc -l)
TOTAL=$(cat $CSV | wc -l) TOTAL=$(cat $CSV | wc -l)
echo "($NUM_WORDPRESS * 100) / ($TOTAL - $NUM_TIMEOUT)" | bc echo "($NUM_WORDPRESS * 100) / ($TOTAL - $NUM_TIMEOUT)" | bc | awk '{print $1"%"}'

1
is-wordpress Submodule

@ -0,0 +1 @@
Subproject commit b64770eb63dcb3924957bed824e79d380089e2c2

View File

@ -5,10 +5,10 @@
[clojure.java.shell :as shell] [clojure.java.shell :as shell]
) (:gen-class)) ) (:gen-class))
(defn request (defn wordpress?
"Make a request by means of curl" "Check if a web page is generated with WordPress"
[url] [url]
(shell/sh "curl" "-L" "-m" "5" "-H" "User-Agent: Firefox" url)) (= (clojure.string/trim-newline (:out (shell/sh "bash" "./is-wordpress/is-wordpress" url))) "true"))
(defn read-csv-domains (defn read-csv-domains
@ -17,11 +17,6 @@
(with-open [reader (io/reader (io/resource url))] (with-open [reader (io/reader (io/resource url))]
(doall (csv/read-csv reader)))) (doall (csv/read-csv reader))))
(defn wordpress?
"Check if a web page is generated with WordPress"
[url]
(let [response (request url)]
(every? identity [(re-find (re-pattern "meta.*generator.*WordPress") (:out response))])))
(defn -main (defn -main
[& args] [& args]