Update line

This commit is contained in:
Andros Fenollosa 2019-10-10 00:37:22 +02:00
parent 16f6b94f3e
commit 5b0ce23c61
4 changed files with 27 additions and 9 deletions

13
Makefile Normal file
View File

@ -0,0 +1,13 @@
.DEFAULT_GOAL := help
help:
@perl -nle'print $& if m{^[a-zA-Z_-]+:.*?## .*$$}' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-25s\033[0m %s\n", $$1, $$2}'
prepare: ## Download and format csv with domains
touch top-1m.csv
rm top-1m.csv
wget http://s3.amazonaws.com/alexa-static/top-1m.csv.zip
unzip top-1m.csv.zip
rm top-1m.csv.zip
cat top-1m.csv | sed -n 's/$$/,nil/p' | >> top-1m.csv
mv top-1m.csv resources
rm top-1m.csv

8
README.md Normal file
View File

@ -0,0 +1,8 @@
## Prepare
make
## Run
lein run

View File

@ -3,6 +3,7 @@
[clj-http.client :as client]
[clojure.data.csv :as csv]
[clojure.java.io :as io]
[clojure.java.shell :refer [sh]]
) (:gen-class))
(defn read-csv-domains
@ -31,12 +32,12 @@
domains-csv (vec (read-csv-domains file-csv))
;; Filters leaving those that have not been checked
domains-unchecked (filter #(= (get % 2) "nil") domains-csv)]
(def mod-domains-csv domains-csv)
;; List with domains with a boolean indicating if it is generate or not in WordPress
(doseq [domain-data domains-unchecked] (let [domain (get domain-data 1)
(doseq [domain-data domains-unchecked] (let [line (get domain-data 0)
domain (get domain-data 1)
;; Check if domain it is generate or not in WordPress
check-wordpress (wordpress? domain)]
;; Edit domains-csv with check WordPress
(def mod-domains-csv (map #(-> (if (= domain (get % 1)) (assoc % 2 (str check-wordpress)) %)) mod-domains-csv))
;; Save domains to CSV
(save-csv-domains file-csv mod-domains-csv)))))
(prn (str line " " domain " " check-wordpress))
(prn (sh "sed" "-i" "1s/b/o/g" (str "resources/" file-csv)))))))
;; (prn (sh "sed" "-i" (str "'" line "s/.*/" line "," domain "," check-wordpress "/g'") (str "resources/" file-csv)))))))

View File

@ -1,4 +0,0 @@
1,google.com
2,youtube.com
4,tmall.com
5,idecrea.es
1 1 google.com
2 2 youtube.com
3 4 tmall.com
4 5 idecrea.es