;;; rent-in-london --- Shortlist houses to rent in London ;;; Copyright © 2022, 2023 Arun Isaac ;;; ;;; This program is free software: you can redistribute it and/or ;;; modify it under the terms of the GNU General Public License as ;;; published by the Free Software Foundation, either version 3 of the ;;; License, or (at your option) any later version. ;;; ;;; This program is distributed in the hope that it will be useful, ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;; General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with this program. If not, see ;;; . (use-modules (rnrs exceptions) (rnrs io ports) (srfi srfi-1) (srfi srfi-9) (srfi srfi-26) (srfi srfi-71) (srfi srfi-43) (ice-9 match) (ice-9 regex) (ice-9 string-fun) (sxml match) (sxml transform) (web client) (web response) (web uri) (htmlprag) (json)) ;; Path to a cache directory. Web pages are cached in this directory ;; so that they don't have to be fetched repeatedly. (define %cache-directory "cache") ;; Coordinates of workplace in #(Latitude Longitude) form. In this ;; instance, these are the coordinates of Darwin Building, University ;; College London. (define %work-coordinates #(51.52305 -0.13295)) (define %openrent-base-url "https://www.openrent.co.uk") ;; Search for all houses within a 30 km radius of Central London. (define %openrent-index-url "https://www.openrent.co.uk/properties-to-rent/central-london-greater-london?term=Central%20London,%20Greater%20London&area=30") ;; Number of seconds before a cache entry is considered stale (define %openrent-index-page-cache-live-time 600) (define %osrm-base-url "https://router.project-osrm.org") (define %tfl-base-url "https://api.tfl.gov.uk") (define (call-with-atomic-output-file filename proc) "Open @var{filename} for output and pass the open port to @var{proc}. Writes to @var{filename} are guaranteed to be atomic." (let* ((temporary-port (mkstemp "rent-XXXXXX")) (temporary-filename (port-filename temporary-port))) (dynamic-wind (const #t) (lambda () (call-with-port temporary-port (lambda _ (proc temporary-port) (fsync temporary-port))) (rename-file temporary-filename filename)) (lambda () (false-if-exception (delete-file temporary-filename)))))) (define (json-ref scm . keys) "Extract subtree of JSON @var{scm} that is addressed by @var{keys}." (match keys ((key other-keys ...) (apply json-ref ((if (list? scm) assoc-ref vector-ref) scm key) other-keys)) (() scm))) (define-record-type (house property-id coordinates live? rent bedrooms bathrooms accept-students? student-only? accept-dss? accept-pets? studio? shared? furnished? unfurnished? video? video-viewing? type hours-live garden? parking? bills-included? fireplace? available-from minimum-tenancy distance) house? (property-id house-property-id) (coordinates house-coordinates) (live? house-live?) (rent house-rent) (bedrooms house-bedrooms) (bathrooms house-bathrooms) (accept-students? house-accept-students?) (student-only? house-student-only?) (accept-dss? house-accept-dss?) (accept-pets? house-accept-pets?) (studio? house-studio?) (shared? house-shared?) (furnished? house-furnished?) (unfurnished? house-unfurnished?) (video? house-video?) (video-viewing? house-video-viewing?) (type house-type) (hours-live house-hours-live) (garden? house-garden?) (parking? house-parking?) (bills-included? house-bills-included?) (fireplace? house-fireplace?) (available-from house-available-from) (minimum-tenancy house-minimum-tenancy) (distance house-distance)) (define (extract-variable html variable-name) "Extract javascript variable of name @var{variable-name} from somewhere in @var{html}, a string." (guard (e (#t (error "Error while extracting" variable-name))) (let ((expression (match:substring (string-match (string-append "var " variable-name " = ([^;]+)") html) 1))) (vector->list (json-string->scm ;; Replace single quotes with double quotes. It's not valid ;; JSON. (string-map (lambda (c) (if (char=? c #\') #\" c)) ;; Remove trailing comma at the end of array or ;; object expression. It's not valid JSON. (string-append (substring expression 0 (string-index-right expression #\,)) (substring expression (1- (string-length expression)))))))))) (define (to-boolean n) "Convert integer @var{n} to boolean. An integer is considered @code{#f} if @var{n} is 0 and @code{#t} otherwise." (not (zero? n))) (define (all-houses) "Return a list of all houses on the OpenRent index page." (let ((html (bytevector->string (http-get* %openrent-index-url %openrent-index-page-cache-live-time) (native-transcoder)))) (map house (extract-variable html "PROPERTYIDS") (map vector (extract-variable html "PROPERTYLISTLATITUDES") (extract-variable html "PROPERTYLISTLONGITUDES")) (map to-boolean (extract-variable html "islivelistBool")) (extract-variable html "prices") (extract-variable html "bedrooms") (extract-variable html "bathrooms") (map to-boolean (extract-variable html "students")) (map to-boolean (extract-variable html "nonStudents")) (map to-boolean (extract-variable html "dss")) (map to-boolean (extract-variable html "pets")) (map to-boolean (extract-variable html "isstudio")) (map to-boolean (extract-variable html "isshared")) (map to-boolean (extract-variable html "furnished")) (map to-boolean (extract-variable html "unfurnished")) (map to-boolean (extract-variable html "hasVideo")) (map to-boolean (extract-variable html "videoViewingsAccepted")) (extract-variable html "propertyTypes") (extract-variable html "hoursLive") (map to-boolean (extract-variable html "gardens")) (map to-boolean (extract-variable html "parkings")) (map to-boolean (extract-variable html "bills")) (map to-boolean (extract-variable html "fireplaces")) (extract-variable html "availableFrom") (extract-variable html "minimumTenancy") (map string->number (extract-variable html "PROPERTYLISTCOMMUTEORDISTANCE"))))) (define (http-get-follow uri) "Like http-get, but follow redirects." (let ((response body (http-get uri #:decode-body? #f))) (if (= (response-code response) 301) ;; If redirect, follow. (let ((target (assq-ref (response-headers response) 'location))) (http-get-follow (build-uri (uri-scheme uri) #:userinfo (uri-userinfo target) #:host (uri-host uri) #:port (uri-port uri) #:path (uri-path target) #:query (uri-query target) #:fragment (uri-fragment target)))) ;; Else, return. body))) (define (uri->filename uri) "Convert @var{uri} to a safe filename." (string-replace-substring (uri->string uri) "/" "|")) (define* (http-get* uri #:optional cache-live-time) "Like http-get, but cache and follow redirects." (let* ((uri (if (string? uri) (string->uri uri) uri)) (cache-file (string-append %cache-directory "/" (uri->filename uri)))) ;; If not cached or cache is stale, query and cache. (when (or (not (file-exists? cache-file)) (and cache-live-time (> (current-time) (+ (stat:mtime (stat cache-file)) cache-live-time)))) (call-with-atomic-output-file cache-file (cut put-bytevector <> (http-get-follow uri)))) ;; Return contents of cache. (call-with-input-file cache-file get-bytevector-all))) (define (house-page-sxml house) "Return the HTML of the page for @var{house} in SXML form." (call-with-port (open-bytevector-input-port (http-get* (string-append %openrent-base-url "/" (number->string (house-property-id house))))) html->sxml)) (define (handle-top root . children) (any (match-lambda (('result . result) result) (_ #f)) children)) (define (handle-default root . children) (or (find (match-lambda (('result . result) #t) (_ #f)) children) root)) (define (house-title house) "Find the title of @var{house}." (pre-post-order (house-page-sxml house) `((h1 *preorder* . ,(lambda (root . children) (sxml-match (cons root children) ((h1 (@ (class "property-title")) ,title) (cons 'result title))))) (*TOP* . ,handle-top) (*default* . ,handle-default)))) (define (house-maximum-tenants house) "Find the maximum number of tenants allowed in @var{house}." (pre-post-order (house-page-sxml house) `((td *preorder* . ,(lambda (root . children) (sxml-match (cons root children) ((td (span (i (@ (class "fa fa-users"))) " Max Tenants:") " " (strong ,maximum-tenants)) (cons 'result (string->number maximum-tenants))) (,_ #f)))) (*TOP* . ,handle-top) (*default* . ,handle-default)))) (define (blank-string? str) "Return @code{#t} if @var{str} only contains whitespace characters. Else, return @code{#f}." (and (string? str) (string-every char-set:whitespace str))) (define (house-tube-stations house) "Return a list of tube stations near @var{house}." (pre-post-order (house-page-sxml house) `((tbody . ,(lambda (root . children) (cons 'result (filter-map (match-lambda (('tube-station . station) station) (_ #f)) children)))) (tr *preorder* . ,(lambda (root . children) (sxml-match (cons root (remove blank-string? children)) ((tr (td . ,icon-children) (td . ,station-children) (td . ,walk-time-children)) (and (any (lambda (child) (sxml-match child ((img (@ (alt "Underground"))) #t) (,_ #f))) icon-children) (cons 'tube-station (any (lambda (child) (and (string? child) (not (blank-string? child)) (string-trim-both child))) station-children)))) (,_ #f)))) (*TOP* . ,(lambda args (or (apply handle-top args) '()))) (*default* . ,handle-default)))) (define (house-cycling-distance house) "Compute the cycling distance between @var{house} and work. The returned distance is the actual distance by cycle, and not distance as the crow flies." (call-with-port (open-bytevector-input-port (http-get* (string-append %osrm-base-url "/table/v1/bike/" (string-join (map (match-lambda (#(latitude longitude) (format #f "~a,~a" longitude latitude))) (list (house-coordinates house) %work-coordinates)) ";") "?annotations=distance&sources=0&destinations=1"))) (lambda (port) (/ (json-ref (json->scm port) "distances" 0 0) 1000)))) (define (all-tube-lines) "Return a list of all tube lines in London." (call-with-port (open-bytevector-input-port (http-get* (string-append %tfl-base-url "/Line/Mode/tube"))) (lambda (port) (map (lambda (line) (json-ref line "id")) (vector->list (json->scm port)))))) (define (stations-on-line line-id) "Return a list of tube stations on @var{line-id}." (call-with-port (open-bytevector-input-port (http-get* (string-append %tfl-base-url "/Line/" line-id "/StopPoints"))) (lambda (port) (map (lambda (station) (let ((station-name (json-ref station "commonName"))) (if (string-suffix? " Underground Station" station-name) (substring station-name 0 (- (string-length station-name) (string-length " Underground Station")))))) (vector->list (json->scm port)))))) (define (lines-at-station station-name) "Return a list of tube lines serving @var{station-name}." (filter (lambda (line) (member station-name (stations-on-line line))) (all-tube-lines))) (define (list-house house) "Display details of @var{house} on the current output port." (format (current-output-port) "~a (posted ~a hours ago) ~a/~a £~a pcm Cycling distance: ~,1f km " (house-title house) (house-hours-live house) %openrent-base-url (house-property-id house) (house-rent house) (house-cycling-distance house)) (match (house-tube-stations house) (() (display "No tube!")) (tube-stations (display "Tube: ") (display (string-join (map (lambda (station) (string-append station " (" (string-join (lines-at-station station)) ")")) tube-stations) ", ")))) (newline) (newline)) (define (list-houses houses) "Display details of @var{houses}, a list, on the current output port." (format (current-output-port) "~a houses~%~%" (length houses)) (for-each list-house houses)) ;; Find houses for which ;; ;; - bills are not included in the rent ;; - the advertisement has been alive for less than 100 hours ;; - is not shared ;; - there are 1 or fewer bedrooms ;; - the rent is between £1200 and £1400 ;; - is unfurnished ;; - the advertisement is live ;; - at least 2 tenants are allowed ;; - there is at least one tube station in the vicinity ;; - is not a bedsit ;; ;; Sort the listed houses newest advertisments last. (list-houses (filter (lambda (house) (and (not (house-bills-included? house)) (< (house-hours-live house) 100) (not (house-shared? house)) (<= (house-bedrooms house) 1) (>= (house-rent house) 1200) (<= (house-rent house) 1400) (house-unfurnished? house) (house-live? house) (>= (house-maximum-tenants house) 2) (not (null? (house-tube-stations house))) (not (string-contains-ci (house-title house) "bedsit")))) (sort (all-houses) (lambda (house1 house2) (> (house-hours-live house1) (house-hours-live house2))))))