;;; rent-in-london --- Shortlist houses to rent in London ;;; Copyright © 2022, 2023 Arun Isaac <arunisaac@systemreboot.net> ;;; ;;; This program is free software: you can redistribute it and/or ;;; modify it under the terms of the GNU General Public License as ;;; published by the Free Software Foundation, either version 3 of the ;;; License, or (at your option) any later version. ;;; ;;; This program is distributed in the hope that it will be useful, ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;; General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with this program. If not, see ;;; <https://www.gnu.org/licenses/>. (use-modules (rnrs exceptions) (rnrs io ports) (srfi srfi-1) (srfi srfi-9) (srfi srfi-19) (srfi srfi-26) (srfi srfi-71) (srfi srfi-43) (srfi srfi-171) (ice-9 match) (ice-9 regex) (ice-9 string-fun) (sxml match) (sxml transform) (web client) (web response) (web uri) (htmlprag) (json)) ;; Path to a cache directory. Web pages are cached in this directory ;; so that they don't have to be fetched repeatedly. (define %cache-directory "cache") ;; Coordinates of workplace in #(Latitude Longitude) form. In this ;; instance, these are the coordinates of Darwin Building, University ;; College London. (define %work-coordinates #(51.52305 -0.13295)) ;; Another frequent destination, say Wembley stadium (define %wembley-stadium #(51.55537 -0.28636)) (define %openrent-base-url "https://www.openrent.co.uk") ;; Search for all houses within a 30 km radius of Central London. (define %openrent-index-url "https://www.openrent.co.uk/properties-to-rent/central-london-greater-london?term=Central%20London,%20Greater%20London&area=30") ;; Number of seconds before a cache entry is considered stale (define %openrent-index-page-cache-live-time 600) (define %osrm-base-url "https://router.project-osrm.org") (define %tfl-base-url "https://api.tfl.gov.uk") (define (call-with-atomic-output-file filename proc) "Open @var{filename} for output and pass the open port to @var{proc}. Writes to @var{filename} are guaranteed to be atomic." (let* ((temporary-port (mkstemp "rent-XXXXXX")) (temporary-filename (port-filename temporary-port))) (dynamic-wind (const #t) (lambda () (call-with-port temporary-port (lambda _ (proc temporary-port) (fsync temporary-port))) (rename-file temporary-filename filename)) (lambda () (false-if-exception (delete-file temporary-filename)))))) (define (json-ref scm . keys) "Extract subtree of JSON @var{scm} that is addressed by @var{keys}." (match keys ((key other-keys ...) (apply json-ref ((if (list? scm) assoc-ref vector-ref) scm key) other-keys)) (() scm))) (define-record-type <house> (house property-id coordinates live? rent bedrooms bathrooms accept-students? student-only? accept-dss? accept-pets? studio? shared? furnished? unfurnished? video? video-viewing? type hours-live garden? parking? bills-included? fireplace? available-from minimum-tenancy distance) house? (property-id house-property-id) (coordinates house-coordinates) (live? house-live?) (rent house-rent) (bedrooms house-bedrooms) (bathrooms house-bathrooms) (accept-students? house-accept-students?) (student-only? house-student-only?) (accept-dss? house-accept-dss?) (accept-pets? house-accept-pets?) (studio? house-studio?) (shared? house-shared?) (furnished? house-furnished?) (unfurnished? house-unfurnished?) (video? house-video?) (video-viewing? house-video-viewing?) (type house-type) (hours-live house-hours-live) (garden? house-garden?) (parking? house-parking?) (bills-included? house-bills-included?) (fireplace? house-fireplace?) (available-from house-available-from) (minimum-tenancy house-minimum-tenancy) (distance house-distance)) (define (extract-variable html variable-name) "Extract javascript variable of name @var{variable-name} from somewhere in @var{html}, a string." (guard (e (#t (error "Error while extracting" variable-name))) (let ((expression (match:substring (string-match (string-append "var " variable-name " = ([^;]+)") html) 1))) (vector->list (json-string->scm ;; Replace single quotes with double quotes. It's not valid ;; JSON. (string-map (lambda (c) (if (char=? c #\') #\" c)) ;; Remove trailing comma at the end of array or ;; object expression. It's not valid JSON. (string-append (substring expression 0 (string-index-right expression #\,)) (substring expression (1- (string-length expression)))))))))) (define (to-boolean n) "Convert integer @var{n} to boolean. An integer is considered @code{#f} if @var{n} is 0 and @code{#t} otherwise." (not (zero? n))) (define* (date day month #:optional (year (date-year (current-date)))) "Construct a @code{<date>} object." (make-date 0 0 0 0 day month year (date-zone-offset (current-date)))) (define (date+ date days) "Add @var{days} number of days to @var{date}." (time-utc->date (add-duration (date->time-utc date) (make-time time-duration 0 (* days 24 60 60))))) (define (make-date-comparer time-predicate) "Return a predicate that compares two dates by converting them to times and comparing them using @var{time-predicate}." (lambda (date1 date2) (time-predicate (date->time-utc date1) (date->time-utc date2)))) (define date<=? (make-date-comparer time<=?)) (define date>=? (make-date-comparer time>=?)) (define (all-houses) "Return a list of all houses on the OpenRent index page." (let ((html (bytevector->string (http-get* %openrent-index-url %openrent-index-page-cache-live-time) (native-transcoder)))) (map house (extract-variable html "PROPERTYIDS") (map vector (extract-variable html "PROPERTYLISTLATITUDES") (extract-variable html "PROPERTYLISTLONGITUDES")) (map to-boolean (extract-variable html "islivelistBool")) (extract-variable html "prices") (extract-variable html "bedrooms") (extract-variable html "bathrooms") (map to-boolean (extract-variable html "students")) (map to-boolean (extract-variable html "nonStudents")) (map to-boolean (extract-variable html "dss")) (map to-boolean (extract-variable html "pets")) (map to-boolean (extract-variable html "isstudio")) (map to-boolean (extract-variable html "isshared")) (map to-boolean (extract-variable html "furnished")) (map to-boolean (extract-variable html "unfurnished")) (map to-boolean (extract-variable html "hasVideo")) (map to-boolean (extract-variable html "videoViewingsAccepted")) (extract-variable html "propertyTypes") (extract-variable html "hoursLive") (map to-boolean (extract-variable html "gardens")) (map to-boolean (extract-variable html "parkings")) (map to-boolean (extract-variable html "bills")) (map to-boolean (extract-variable html "fireplaces")) (map (lambda (offset) (date+ (current-date) offset)) (extract-variable html "availableFrom")) (extract-variable html "minimumTenancy") (map string->number (extract-variable html "PROPERTYLISTCOMMUTEORDISTANCE"))))) (define (http-get-follow uri) "Like http-get, but follow redirects." (let ((response body (http-get uri #:decode-body? #f))) (if (= (response-code response) 301) ;; If redirect, follow. (let ((target (assq-ref (response-headers response) 'location))) (http-get-follow (build-uri (uri-scheme uri) #:userinfo (uri-userinfo target) #:host (uri-host uri) #:port (uri-port uri) #:path (uri-path target) #:query (uri-query target) #:fragment (uri-fragment target)))) ;; Else, return. body))) (define (uri->filename uri) "Convert @var{uri} to a safe filename." (string-replace-substring (uri->string uri) "/" "|")) (define* (http-get* uri #:optional cache-live-time) "Like http-get, but cache and follow redirects." (let* ((uri (if (string? uri) (string->uri uri) uri)) (cache-file (string-append %cache-directory "/" (uri->filename uri)))) ;; If not cached or cache is stale, query and cache. (when (or (not (file-exists? cache-file)) (and cache-live-time (> (time-second (current-time)) (+ (stat:mtime (stat cache-file)) cache-live-time)))) (call-with-atomic-output-file cache-file (cut put-bytevector <> (http-get-follow uri)))) ;; Return contents of cache. (call-with-input-file cache-file get-bytevector-all))) (define* (json-get uri #:optional cache-live-time) "Invoke @code{http-get*} on @var{uri} and process the response as JSON." (call-with-port (open-bytevector-input-port (http-get* uri cache-live-time)) json->scm)) (define (house-page-sxml house) "Return the HTML of the page for @var{house} in SXML form." (call-with-port (open-bytevector-input-port (http-get* (string-append %openrent-base-url "/" (number->string (house-property-id house))))) html->sxml)) (define (handle-top root . children) (any (match-lambda (('result . result) result) (_ #f)) children)) (define (handle-default root . children) (or (find (match-lambda (('result . result) #t) (_ #f)) children) root)) (define (house-title house) "Find the title of @var{house}." (pre-post-order (house-page-sxml house) `((h1 *preorder* . ,(lambda (root . children) (sxml-match (cons root children) ((h1 (@ (class "property-title")) ,title) (cons 'result title))))) (*TOP* . ,handle-top) (*default* . ,handle-default)))) (define (house-maximum-tenants house) "Find the maximum number of tenants allowed in @var{house}." (pre-post-order (house-page-sxml house) `((td *preorder* . ,(lambda (root . children) (sxml-match (cons root children) ((td (span (i (@ (class "fa fa-users"))) " Max Tenants:") " " (strong ,maximum-tenants)) (cons 'result (string->number maximum-tenants))) (,_ #f)))) (*TOP* . ,handle-top) (*default* . ,handle-default)))) (define (blank-string? str) "Return @code{#t} if @var{str} only contains whitespace characters. Else, return @code{#f}." (and (string? str) (string-every char-set:whitespace str))) (define (house-tube-stations house) "Return a list of tube stations near @var{house}." (pre-post-order (house-page-sxml house) `((tbody . ,(lambda (root . children) (cons 'result (filter-map (match-lambda (('tube-station . station) station) (_ #f)) children)))) (tr *preorder* . ,(lambda (root . children) (sxml-match (cons root (remove blank-string? children)) ((tr (td . ,icon-children) (td . ,station-children) (td . ,walk-time-children)) (and (any (lambda (child) (sxml-match child ((img (@ (alt "Underground"))) #t) (,_ #f))) icon-children) (cons 'tube-station (any (lambda (child) (and (string? child) (not (blank-string? child)) (string-trim-both child))) station-children)))) (,_ #f)))) (*TOP* . ,(lambda args (or (apply handle-top args) '()))) (*default* . ,handle-default)))) (define (cycling-distances source . destinations) "Compute the cycling distance between @var{source}, a point, and @var{destinations}, a list of points. The returned distance is a list of actual distances by cycle, and not distance as the crow flies." (map (cut / <> 1000) (vector->list (json-ref (json-get ;; We use ~f for latitude/longitude since ~a would use the ;; exponential float notation for small numbers and the API does ;; not like that. (format #f "~a/table/v1/bike/~a?annotations=distance&sources=0&destinations=~a" %osrm-base-url (string-join (map (match-lambda (#(latitude longitude) (format #f "~f,~f" longitude latitude))) (cons source destinations)) ";") (string-join (map number->string (iota (length destinations) 1)) ";"))) "distances" 0)))) (define (lines-at-station station-name) "Return a list of tube lines serving @var{station-name}." (cond ;; Correct slightly misnamed stations. ((string=? station-name "Paddington (Hammersmith & City)") (lines-at-station "Paddington")) ((member station-name (list "Hammersmith (District & Piccadilly)" "Hammersmith (District and Piccadilly)")) (lines-at-station "Hammersmith")) (else (let* ((modes (list "dlr" "elizabeth-line" "tube")) (station-id ;; Assume the first search result is the station we are ;; looking for. (json-ref (json-get (string-append %tfl-base-url "/StopPoint/Search/" (uri-encode station-name) "?modes=" (string-join modes ","))) "matches" 0 "id"))) ;; Assume that we actually find tube lines at this station. This ;; may not be the case if the supplied station-name is not ;; actually a tube station. (vector->list (json-ref (find (lambda (mode-group) (member (json-ref mode-group "modeName") modes)) (vector->list (json-ref (json-get (string-append %tfl-base-url "/StopPoint/" station-id)) "lineModeGroups"))) "lineIdentifier")))))) (define (list-house house) "Display details of @var{house} on the current output port." (apply format (current-output-port) "~a (posted ~a ago) ~a/~a £~a pcm Available from ~a Cycling distance to work: ~,1f km Cycling distance to Wembley stadium: ~,1f km " (house-title house) (if (< (house-hours-live house) 24) (format #f "~a hours" (house-hours-live house)) (format #f "~a days" (round-quotient (house-hours-live house) 24))) %openrent-base-url (house-property-id house) (house-rent house) (date->string (house-available-from house) "~B ~d") (cycling-distances (house-coordinates house) %work-coordinates %wembley-stadium)) (match (house-tube-stations house) (() (display "No tube!")) (tube-stations (display "Tube: ") (display (string-join (map (lambda (station) (string-append station " (" (string-join (lines-at-station station)) ")")) tube-stations) ", ")))) (newline) (newline)) (define (list-houses houses) "Display details of @var{houses}, a list, on the current output port." (format (current-output-port) "~a houses~%~%" (length houses)) (for-each list-house houses)) ;; Find houses for which ;; ;; - bills are not included in the rent ;; - the advertisement has been alive for less than 100 hours ;; - is not shared ;; - there are 1 or fewer bedrooms ;; - the rent is between £1200 and £1400 ;; - is unfurnished ;; - the advertisement is live ;; - at least 2 tenants are allowed ;; - there is at least one tube station in the vicinity ;; - is not a bedsit ;; ;; Sort the listed houses newest advertisments first. (format (current-output-port) "~a houses~%" (list-transduce (compose (tfilter (lambda (house) (and (not (house-bills-included? house)) (< (house-hours-live house) 100) (not (house-shared? house)) (<= (house-bedrooms house) 1) (>= (house-rent house) 1200) (<= (house-rent house) 1400) (house-unfurnished? house) (house-live? house) (>= (house-maximum-tenants house) 2) (not (null? (house-tube-stations house))) (not (string-contains-ci (house-title house) "bedsit"))))) (tlog (lambda (_ house) (list-house house)))) rcount (sort (all-houses) (lambda (house1 house2) (< (house-hours-live house1) (house-hours-live house2))))))