From d132808b1b23a30c50a8cee5d853f2e53e30df3e Mon Sep 17 00:00:00 2001 From: Arun Isaac Date: Sun, 11 Dec 2022 00:43:25 +0000 Subject: Initial commit --- rent.scm | 391 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 391 insertions(+) create mode 100644 rent.scm (limited to 'rent.scm') diff --git a/rent.scm b/rent.scm new file mode 100644 index 0000000..3fe8d94 --- /dev/null +++ b/rent.scm @@ -0,0 +1,391 @@ +;;; rent-in-london --- Shortlist houses to rent in London +;;; Copyright © 2022 Arun Isaac +;;; +;;; This program is free software: you can redistribute it and/or +;;; modify it under the terms of the GNU General Public License as +;;; published by the Free Software Foundation, either version 3 of the +;;; License, or (at your option) any later version. +;;; +;;; This program is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;; General Public License for more details. +;;; +;;; You should have received a copy of the GNU General Public License +;;; along with this program. If not, see +;;; . + +(use-modules (rnrs exceptions) + (rnrs io ports) + (srfi srfi-1) + (srfi srfi-9) + (srfi srfi-26) + (srfi srfi-71) + (srfi srfi-43) + (ice-9 match) + (ice-9 regex) + (ice-9 string-fun) + (sxml match) + (sxml transform) + (web client) + (web response) + (web uri) + (htmlprag) + (json)) + +;; Path to a cache directory. Web pages are cached in this directory +;; so that they don't have to be fetched repeatedly. +(define %cache-directory + "cache") + +;; Coordinates of workplace in #(Latitude Longitude) form. In this +;; instance, these are the coordinates of Darwin Building, University +;; College London. +(define %work-coordinates + #(51.52305 -0.13295)) + +(define %openrent-base-url + "https://www.openrent.co.uk") + +;; Search for all houses within a 30 km radius of Central London. +(define %openrent-index-url + "https://www.openrent.co.uk/properties-to-rent/central-london-greater-london?term=Central%20London,%20Greater%20London&area=30") + +;; Number of seconds before a cache entry is considered stale +(define %openrent-index-page-cache-live-time + 600) + +(define %osrm-base-url + "https://router.project-osrm.org") + +(define %tfl-base-url + "https://api.tfl.gov.uk") + +(define (json-ref scm . keys) + (match keys + ((key other-keys ...) + (apply json-ref + ((if (list? scm) assoc-ref vector-ref) scm key) + other-keys)) + (() scm))) + +(define-record-type + (house property-id coordinates live? rent bedrooms bathrooms + accept-students? student-only? accept-dss? accept-pets? studio? + shared? furnished? unfurnished? video? video-viewing? type + hours-live garden? parking? bills-included? fireplace? available-from + minimum-tenancy distance) + house? + (property-id house-property-id) + (coordinates house-coordinates) + (live? house-live?) + (rent house-rent) + (bedrooms house-bedrooms) + (bathrooms house-bathrooms) + (accept-students? house-accept-students?) + (student-only? house-student-only?) + (accept-dss? house-accept-dss?) + (accept-pets? house-accept-pets?) + (studio? house-studio?) + (shared? house-shared?) + (furnished? house-furnished?) + (unfurnished? house-unfurnished?) + (video? house-video?) + (video-viewing? house-video-viewing?) + (type house-type) + (hours-live house-hours-live) + (garden? house-garden?) + (parking? house-parking?) + (bills-included? house-bills-included?) + (fireplace? house-fireplace?) + (available-from house-available-from) + (minimum-tenancy house-minimum-tenancy) + (distance house-distance)) + +(define (extract-variable html variable-name) + (guard (e (#t (error "Error while extracting" variable-name))) + (let ((expression + (match:substring + (string-match (string-append "var " variable-name " = ([^;]+)") + html) + 1))) + (vector->list + (json-string->scm + ;; Replace single quotes with double quotes. It's not valid + ;; JSON. + (string-map (lambda (c) + (if (char=? c #\') #\" c)) + ;; Remove trailing comma at the end of array or + ;; object expression. It's not valid JSON. + (string-append + (substring expression + 0 + (string-index-right expression #\,)) + (substring expression (1- (string-length expression)))))))))) + +(define (to-boolean n) + (not (zero? n))) + +(define (all-houses) + (let ((html (bytevector->string (http-get* %openrent-index-url + %openrent-index-page-cache-live-time) + (native-transcoder)))) + (map house + (extract-variable html "PROPERTYIDS") + (map vector + (extract-variable html "PROPERTYLISTLATITUDES") + (extract-variable html "PROPERTYLISTLONGITUDES")) + (map to-boolean + (extract-variable html "islivelistBool")) + (extract-variable html "prices") + (extract-variable html "bedrooms") + (extract-variable html "bathrooms") + (map to-boolean + (extract-variable html "students")) + (map to-boolean + (extract-variable html "nonStudents")) + (map to-boolean + (extract-variable html "dss")) + (map to-boolean + (extract-variable html "pets")) + (map to-boolean + (extract-variable html "isstudio")) + (map to-boolean + (extract-variable html "isshared")) + (map to-boolean + (extract-variable html "furnished")) + (map to-boolean + (extract-variable html "unfurnished")) + (map to-boolean + (extract-variable html "hasVideo")) + (map to-boolean + (extract-variable html "videoViewingsAccepted")) + (extract-variable html "propertyTypes") + (extract-variable html "hoursLive") + (map to-boolean + (extract-variable html "gardens")) + (map to-boolean + (extract-variable html "parkings")) + (map to-boolean + (extract-variable html "bills")) + (map to-boolean + (extract-variable html "fireplaces")) + (extract-variable html "availableFrom") + (extract-variable html "minimumTenancy") + (map string->number + (extract-variable html "PROPERTYLISTCOMMUTEORDISTANCE"))))) + +(define (http-get-follow uri) + "Like http-get, but follow redirects." + (let ((response body (http-get uri #:decode-body? #f))) + (if (= (response-code response) 301) + ;; If redirect, follow. + (let ((target (assq-ref (response-headers response) + 'location))) + (http-get-follow (build-uri (uri-scheme uri) + #:userinfo (uri-userinfo target) + #:host (uri-host uri) + #:port (uri-port uri) + #:path (uri-path target) + #:query (uri-query target) + #:fragment (uri-fragment target)))) + ;; Else, return. + body))) + +(define (uri->filename uri) + (string-replace-substring (uri->string uri) "/" "|")) + +(define* (http-get* uri #:optional cache-live-time) + "Like http-get, but cache and follow redirects." + (let* ((uri (if (string? uri) + (string->uri uri) + uri)) + (cache-file (string-append %cache-directory "/" (uri->filename uri)))) + ;; If not cached or cache is stale, query and cache. + (when (or (not (file-exists? cache-file)) + (and cache-live-time + (> (current-time) + (+ (stat:mtime (stat cache-file)) + cache-live-time)))) + (call-with-output-file cache-file + (cut put-bytevector <> (http-get-follow uri)))) + ;; Return contents of cache. + (call-with-input-file cache-file + get-bytevector-all))) + +(define (house-page-sxml house) + (call-with-port (open-bytevector-input-port + (http-get* (string-append %openrent-base-url + "/" (number->string (house-property-id house))))) + html->sxml)) + +(define (handle-top root . children) + (any (match-lambda + (('result . result) result) + (_ #f)) + children)) + +(define (handle-default root . children) + (or (find (match-lambda + (('result . result) #t) + (_ #f)) + children) + root)) + +(define (house-title house) + (pre-post-order + (house-page-sxml house) + `((h1 *preorder* . ,(lambda (root . children) + (sxml-match (cons root children) + ((h1 (@ (class "property-title")) ,title) + (cons 'result title))))) + (*TOP* . ,handle-top) + (*default* . ,handle-default)))) + +(define (house-maximum-tenants house) + (pre-post-order + (house-page-sxml house) + `((td *preorder* . ,(lambda (root . children) + (sxml-match (cons root children) + ((td (i (@ (class "fa fa-users"))) " " (strong ,maximum-tenants)) + (cons 'result (string->number maximum-tenants))) + (,_ #f)))) + (*TOP* . ,handle-top) + (*default* . ,handle-default)))) + +(define (blank-string? str) + (and (string? str) + (string-every char-set:whitespace str))) + +(define (house-tube-stations house) + (pre-post-order + (house-page-sxml house) + `((tbody . ,(lambda (root . children) + (cons 'result (filter-map (match-lambda + (('tube-station . station) station) + (_ #f)) + children)))) + (tr *preorder* . ,(lambda (root . children) + (sxml-match (cons root (remove blank-string? children)) + ((tr (td . ,icon-children) + (td . ,station-children) + (td . ,walk-time-children)) + (and (any (lambda (child) + (sxml-match child + ((img (@ (alt "Underground"))) #t) + (,_ #f))) + icon-children) + (cons 'tube-station + (any (lambda (child) + (and (string? child) + (not (blank-string? child)) + (string-trim-both child))) + station-children)))) + (,_ #f)))) + (*TOP* . ,(lambda args + (or (apply handle-top args) + '()))) + (*default* . ,handle-default)))) + +(define (house-cycling-distance house) + (call-with-port (open-bytevector-input-port + (http-get* + (string-append %osrm-base-url "/table/v1/bike/" + (string-join (map (match-lambda + (#(latitude longitude) + (format #f "~a,~a" longitude latitude))) + (list (house-coordinates house) + %work-coordinates)) + ";") + "?annotations=distance&sources=0&destinations=1"))) + (lambda (port) + (/ (json-ref (json->scm port) + "distances" 0 0) + 1000)))) + +(define (all-tube-lines) + (call-with-port (open-bytevector-input-port + (http-get* (string-append %tfl-base-url + "/Line/Mode/tube"))) + (lambda (port) + (map (lambda (line) + (json-ref line "id")) + (vector->list (json->scm port)))))) + +(define (stations-on-line line-id) + (call-with-port (open-bytevector-input-port + (http-get* (string-append %tfl-base-url + "/Line/" line-id "/StopPoints"))) + (lambda (port) + (map (lambda (station) + (let ((station-name (json-ref station "commonName"))) + (if (string-suffix? " Underground Station" station-name) + (substring station-name 0 (- (string-length station-name) + (string-length " Underground Station")))))) + (vector->list (json->scm port)))))) + +(define (lines-at-station station-name) + (filter (lambda (line) + (member station-name (stations-on-line line))) + (all-tube-lines))) + +(define (list-house house) + (format (current-output-port) + "~a (posted ~a hours ago) +~a/~a +£~a pcm +Cycling distance: ~,1f km +" + (house-title house) + (house-hours-live house) + %openrent-base-url + (house-property-id house) + (house-rent house) + (house-cycling-distance house)) + (match (house-tube-stations house) + (() (display "No tube!")) + (tube-stations + (display "Tube: ") + (display (string-join (map (lambda (station) + (string-append station " (" (string-join (lines-at-station station)) ")")) + tube-stations) + ", ")))) + (newline) + (newline)) + +(define (list-houses houses) + (format (current-output-port) "~a houses~%~%" (length houses)) + (for-each list-house houses)) + +;; Find houses for which +;; +;; - bills are not included in the rent +;; - the advertisement has been alive for less than 100 hours +;; - is not shared +;; - there are 1 or fewer bedrooms +;; - the rent is between £1200 and £1400 +;; - is unfurnished +;; - the advertisement is live +;; - at least 2 tenants are allowed +;; - there is at least one tube station in the vicinity +;; - is not a bedsit +;; +;; Sort the listed houses newest advertisments last. +(list-houses + (filter (lambda (house) + (and + (not (house-bills-included? house)) + (< (house-hours-live house) 100) + (not (house-shared? house)) + (<= (house-bedrooms house) 1) + (>= (house-rent house) 1200) + (<= (house-rent house) 1400) + (house-unfurnished? house) + (house-live? house) + (>= (house-maximum-tenants house) 2) + (not (null? (house-tube-stations house))) + (not (string-contains-ci (house-title house) "bedsit")))) + (sort (all-houses) + (lambda (house1 house2) + (> (house-hours-live house1) + (house-hours-live house2)))))) -- cgit v1.2.3