From 5b0ce23c61d6d4fb960616a18d2878ea9623a148 Mon Sep 17 00:00:00 2001 From: Andros Fenollosa Date: Thu, 10 Oct 2019 00:37:22 +0200 Subject: [PATCH] Update line --- Makefile | 13 +++++++++++++ README.md | 8 ++++++++ src/wordpress_used/core.clj | 11 ++++++----- top-1m-test.csv | 4 ---- 4 files changed, 27 insertions(+), 9 deletions(-) create mode 100644 Makefile create mode 100644 README.md delete mode 100644 top-1m-test.csv diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..abc23a5 --- /dev/null +++ b/Makefile @@ -0,0 +1,13 @@ +.DEFAULT_GOAL := help +help: + @perl -nle'print $& if m{^[a-zA-Z_-]+:.*?## .*$$}' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-25s\033[0m %s\n", $$1, $$2}' + +prepare: ## Download and format csv with domains + touch top-1m.csv + rm top-1m.csv + wget http://s3.amazonaws.com/alexa-static/top-1m.csv.zip + unzip top-1m.csv.zip + rm top-1m.csv.zip + cat top-1m.csv | sed -n 's/$$/,nil/p' | >> top-1m.csv + mv top-1m.csv resources + rm top-1m.csv diff --git a/README.md b/README.md new file mode 100644 index 0000000..bbb1918 --- /dev/null +++ b/README.md @@ -0,0 +1,8 @@ +## Prepare + +make + + +## Run + +lein run diff --git a/src/wordpress_used/core.clj b/src/wordpress_used/core.clj index ab7ad45..069d022 100644 --- a/src/wordpress_used/core.clj +++ b/src/wordpress_used/core.clj @@ -3,6 +3,7 @@ [clj-http.client :as client] [clojure.data.csv :as csv] [clojure.java.io :as io] + [clojure.java.shell :refer [sh]] ) (:gen-class)) (defn read-csv-domains @@ -31,12 +32,12 @@ domains-csv (vec (read-csv-domains file-csv)) ;; Filters leaving those that have not been checked domains-unchecked (filter #(= (get % 2) "nil") domains-csv)] - (def mod-domains-csv domains-csv) ;; List with domains with a boolean indicating if it is generate or not in WordPress - (doseq [domain-data domains-unchecked] (let [domain (get domain-data 1) + (doseq [domain-data domains-unchecked] (let [line (get domain-data 0) + domain (get domain-data 1) ;; Check if domain it is generate or not in WordPress check-wordpress (wordpress? domain)] ;; Edit domains-csv with check WordPress - (def mod-domains-csv (map #(-> (if (= domain (get % 1)) (assoc % 2 (str check-wordpress)) %)) mod-domains-csv)) - ;; Save domains to CSV - (save-csv-domains file-csv mod-domains-csv))))) + (prn (str line " " domain " " check-wordpress)) + (prn (sh "sed" "-i" "1s/b/o/g" (str "resources/" file-csv))))))) +;; (prn (sh "sed" "-i" (str "'" line "s/.*/" line "," domain "," check-wordpress "/g'") (str "resources/" file-csv))))))) diff --git a/top-1m-test.csv b/top-1m-test.csv deleted file mode 100644 index 83c7d69..0000000 --- a/top-1m-test.csv +++ /dev/null @@ -1,4 +0,0 @@ -1,google.com -2,youtube.com -4,tmall.com -5,idecrea.es