Update project and core
This commit is contained in:
		@@ -1,11 +1,10 @@
 | 
			
		||||
(defproject wordpress-used "0.1.0-SNAPSHOT"
 | 
			
		||||
  :description "FIXME: write description"
 | 
			
		||||
(defproject wordpress-used "1.0.0-SNAPSHOT"
 | 
			
		||||
  :description "Calculates WordPress usage index from a CSV list of domains"
 | 
			
		||||
  :url "http://example.com/FIXME"
 | 
			
		||||
  :license {:name "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"
 | 
			
		||||
            :url  "https://www.eclipse.org/legal/epl-2.0/"}
 | 
			
		||||
  :dependencies [[org.clojure/clojure "1.10.0"]
 | 
			
		||||
                 [clj-http "3.10.0"]
 | 
			
		||||
                 [org.clojure/data.csv "0.1.4"]
 | 
			
		||||
                 ]
 | 
			
		||||
                 [org.clojure/data.csv "0.1.4"]]
 | 
			
		||||
  :main ^:skip-aot wordpress-used.core
 | 
			
		||||
  :repl-options {:init-ns wordpress-used.core})
 | 
			
		||||
 
 | 
			
		||||
@@ -1,20 +1,4 @@
 | 
			
		||||
1,google.com
 | 
			
		||||
2,youtube.com
 | 
			
		||||
3,baidu.com
 | 
			
		||||
4,tmall.com
 | 
			
		||||
5,qq.com
 | 
			
		||||
6,taobao.com
 | 
			
		||||
7,sohu.com
 | 
			
		||||
8,facebook.com
 | 
			
		||||
9,wikipedia.org
 | 
			
		||||
10,yahoo.com
 | 
			
		||||
11,login.tmall.com
 | 
			
		||||
12,amazon.com
 | 
			
		||||
13,360.cn
 | 
			
		||||
14,jd.com
 | 
			
		||||
15,weibo.com
 | 
			
		||||
16,sina.com.cn
 | 
			
		||||
17,live.com
 | 
			
		||||
18,reddit.com
 | 
			
		||||
19,pages.tmall.com
 | 
			
		||||
20,vk.com
 | 
			
		||||
5,idecrea.es
 | 
			
		||||
		
		
			
  | 
@@ -5,23 +5,45 @@
 | 
			
		||||
   [clojure.java.io :as io]
 | 
			
		||||
   ) (:gen-class))
 | 
			
		||||
 | 
			
		||||
(defn read-csv-domains
 | 
			
		||||
  "Read CSV file with all domains"
 | 
			
		||||
  [url]
 | 
			
		||||
  (with-open [reader (io/reader (io/resource url))]
 | 
			
		||||
    (doall (csv/read-csv reader))))
 | 
			
		||||
 | 
			
		||||
(defn save-csv-domains
 | 
			
		||||
  "Save the list with the domains in a CSV file"
 | 
			
		||||
  [url new-domains]
 | 
			
		||||
  (with-open [writer (io/writer url)]
 | 
			
		||||
    (csv/write-csv writer new-domains)))
 | 
			
		||||
 | 
			
		||||
(defn wordpress?
 | 
			
		||||
  "Check site used WordPress with meta generator"
 | 
			
		||||
  "Check if a web page is generated with WordPress"
 | 
			
		||||
  [url]
 | 
			
		||||
  (let [response (client/get (str "http://" url "/") {:ignore-unknown-host? true, :connection-timeout 5000, :throw-exceptions false})]
 | 
			
		||||
    (every? identity [(re-find (re-pattern "meta.*generator.*WordPress") (:body response))])))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
(defn -main
 | 
			
		||||
  [& args]
 | 
			
		||||
  ;; Read CSV with all domains
 | 
			
		||||
  (with-open [reader (io/reader (clojure.java.io/resource "top-1m-test.csv"))]
 | 
			
		||||
    (doall
 | 
			
		||||
      (let [domains                (csv/read-csv reader)
 | 
			
		||||
            ;; Check is WordPress
 | 
			
		||||
            domains-with-wordpress (doall (map #(conj % (wordpress? (get % 1))) domains))]
 | 
			
		||||
        ;;domains-with-wordpress (map #(conj % (wordpress? (get % 1))) domains)]
 | 
			
		||||
      ;; Save CSV
 | 
			
		||||
      (with-open [writer (io/writer (clojure.java.io/resource "top-1m-test.csv"))]
 | 
			
		||||
        (csv/write-csv writer (vec domains-with-wordpress)))
 | 
			
		||||
      ))))
 | 
			
		||||
  (let [;; Name of the file containing the CSV with the domains
 | 
			
		||||
        file-csv       "top-1m-test.csv"
 | 
			
		||||
        ;; List with domains
 | 
			
		||||
        domains        (read-csv-domains file-csv)
 | 
			
		||||
        ;; List with domains with a boolean indicating if it is generate or not in WordPress
 | 
			
		||||
        domains-checks (doall (vec (map #(conj % (wordpress? (get % 1))) domains)))]
 | 
			
		||||
    ;; Save domains to CSV
 | 
			
		||||
    (save-csv-domains file-csv domains-checks)))
 | 
			
		||||
 | 
			
		||||
;; (defn -main
 | 
			
		||||
;;   [& args]
 | 
			
		||||
;;   ;; Read CSV with all domains
 | 
			
		||||
;;   (with-open [reader (io/reader (clojure.java.io/resource "top-1m-test.csv"))]
 | 
			
		||||
;;     (doall
 | 
			
		||||
;;       (let [domains                (csv/read-csv reader)
 | 
			
		||||
;;             ;; Check is WordPress
 | 
			
		||||
;;             domains-with-wordpress (vec (map #(conj % (wordpress? (get % 1))) domains))]
 | 
			
		||||
 | 
			
		||||
;;         ;; Save CSV
 | 
			
		||||
;;         (with-open [writer (io/writer (clojure.java.io/resource "top-1m-test.csv"))]
 | 
			
		||||
;;           (csv/write-csv writer (doall domains-with-wordpress)))
 | 
			
		||||
;;         ))))
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										4
									
								
								top-1m-test.csv
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										4
									
								
								top-1m-test.csv
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,4 @@
 | 
			
		||||
1,google.com,false
 | 
			
		||||
2,youtube.com,false
 | 
			
		||||
4,tmall.com,false
 | 
			
		||||
5,idecrea.es,true
 | 
			
		||||
		
		
			
  | 
		Reference in New Issue
	
	Block a user