aboutsummaryrefslogtreecommitdiff
;;; rent-in-london --- Shortlist houses to rent in London
;;; Copyright © 2022, 2023 Arun Isaac <arunisaac@systemreboot.net>
;;;
;;; This program is free software: you can redistribute it and/or
;;; modify it under the terms of the GNU General Public License as
;;; published by the Free Software Foundation, either version 3 of the
;;; License, or (at your option) any later version.
;;; 
;;; This program is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;;; General Public License for more details.
;;; 
;;; You should have received a copy of the GNU General Public License
;;; along with this program. If not, see
;;; <https://www.gnu.org/licenses/>.

(use-modules (rnrs exceptions)
             (rnrs io ports)
             (srfi srfi-1)
             (srfi srfi-9)
             (srfi srfi-19)
             (srfi srfi-26)
             (srfi srfi-71)
             (srfi srfi-43)
             (srfi srfi-171)
             (ice-9 match)
             (ice-9 regex)
             (ice-9 string-fun)
             (sxml match)
             (sxml transform)
             (web client)
             (web response)
             (web uri)
             (htmlprag)
             (json))

;; Path to a cache directory. Web pages are cached in this directory
;; so that they don't have to be fetched repeatedly.
(define %cache-directory
  "cache")

;; Coordinates of workplace in #(Latitude Longitude) form. In this
;; instance, these are the coordinates of Darwin Building, University
;; College London.
(define %work-coordinates
  #(51.52305 -0.13295))

;; Another frequent destination, say Wembley stadium
(define %wembley-stadium
  #(51.55537 -0.28636))

(define %openrent-base-url
  "https://www.openrent.co.uk")

;; Search for all houses within a 30 km radius of Central London.
(define %openrent-index-url
  "https://www.openrent.co.uk/properties-to-rent/central-london-greater-london?term=Central%20London,%20Greater%20London&area=30")

;; Number of seconds before a cache entry is considered stale
(define %openrent-index-page-cache-live-time
  600)

(define %osrm-base-url
  "https://router.project-osrm.org")

(define %tfl-base-url
  "https://api.tfl.gov.uk")

(define (call-with-atomic-output-file filename proc)
  "Open @var{filename} for output and pass the open port to
@var{proc}. Writes to @var{filename} are guaranteed to be atomic."
  (let* ((temporary-port (mkstemp "rent-XXXXXX"))
         (temporary-filename (port-filename temporary-port)))
    (dynamic-wind
      (const #t)
      (lambda ()
        (call-with-port temporary-port
          (lambda _
            (proc temporary-port)
            (fsync temporary-port)))
        (rename-file temporary-filename filename))
      (lambda ()
        (false-if-exception (delete-file temporary-filename))))))

(define (json-ref scm . keys)
  "Extract subtree of JSON @var{scm} that is addressed by @var{keys}."
  (match keys
    ((key other-keys ...)
     (apply json-ref
            ((if (list? scm) assoc-ref vector-ref) scm key)
            other-keys))
    (() scm)))

(define-record-type <house>
  (house property-id coordinates live? rent bedrooms bathrooms
         accept-students? student-only? accept-dss? accept-pets? studio?
         shared? furnished? unfurnished? video? video-viewing? type
         hours-live garden? parking? bills-included? fireplace? available-from
         minimum-tenancy distance)
  house?
  (property-id house-property-id)
  (coordinates house-coordinates)
  (live? house-live?)
  (rent house-rent)
  (bedrooms house-bedrooms)
  (bathrooms house-bathrooms)
  (accept-students? house-accept-students?)
  (student-only? house-student-only?)
  (accept-dss? house-accept-dss?)
  (accept-pets? house-accept-pets?)
  (studio? house-studio?)
  (shared? house-shared?)
  (furnished? house-furnished?)
  (unfurnished? house-unfurnished?)
  (video? house-video?)
  (video-viewing? house-video-viewing?)
  (type house-type)
  (hours-live house-hours-live)
  (garden? house-garden?)
  (parking? house-parking?)
  (bills-included? house-bills-included?)
  (fireplace? house-fireplace?)
  (available-from house-available-from)
  (minimum-tenancy house-minimum-tenancy)
  (distance house-distance))

(define (extract-variable html variable-name)
  "Extract javascript variable of name @var{variable-name} from
somewhere in @var{html}, a string."
  (guard (e (#t (error "Error while extracting" variable-name)))
    (let ((expression
           (match:substring
            (string-match (string-append "var " variable-name " = ([^;]+)")
                          html)
            1)))
      (vector->list
       (json-string->scm
        ;; Replace single quotes with double quotes. It's not valid
        ;; JSON.
        (string-map (lambda (c)
                      (if (char=? c #\') #\" c))
                    ;; Remove trailing comma at the end of array or
                    ;; object expression. It's not valid JSON.
                    (string-append
                     (substring expression
                                0
                                (string-index-right expression #\,))
                     (substring expression (1- (string-length expression))))))))))

(define (to-boolean n)
  "Convert integer @var{n} to boolean. An integer is considered
@code{#f} if @var{n} is 0 and @code{#t} otherwise."
  (not (zero? n)))

(define* (date day month #:optional (year (date-year (current-date))))
  "Construct a @code{<date>} object."
  (make-date 0 0 0 0 day month year (date-zone-offset (current-date))))

(define (date+ date days)
  "Add @var{days} number of days to @var{date}."
  (time-utc->date
   (add-duration (date->time-utc date)
                 (make-time time-duration 0 (* days 24 60 60)))))

(define (make-date-comparer time-predicate)
  "Return a predicate that compares two dates by converting them to
times and comparing them using @var{time-predicate}."
  (lambda (date1 date2)
    (time-predicate (date->time-utc date1)
                    (date->time-utc date2))))

(define date<=?
  (make-date-comparer time<=?))

(define date>=?
  (make-date-comparer time>=?))

(define (all-houses)
  "Return a list of all houses on the OpenRent index page."
  (let ((html (bytevector->string (http-get* %openrent-index-url
                                             %openrent-index-page-cache-live-time)
                                  (native-transcoder))))
    (map house
         (extract-variable html "PROPERTYIDS")
         (map vector
              (extract-variable html "PROPERTYLISTLATITUDES")
              (extract-variable html "PROPERTYLISTLONGITUDES"))
         (map to-boolean
              (extract-variable html "islivelistBool"))
         (extract-variable html "prices")
         (extract-variable html "bedrooms")
         (extract-variable html "bathrooms")
         (map to-boolean
              (extract-variable html "students"))
         (map to-boolean
              (extract-variable html "nonStudents"))
         (map to-boolean
              (extract-variable html "dss"))
         (map to-boolean
              (extract-variable html "pets"))
         (map to-boolean
              (extract-variable html "isstudio"))
         (map to-boolean
              (extract-variable html "isshared"))
         (map to-boolean
              (extract-variable html "furnished"))
         (map to-boolean
              (extract-variable html "unfurnished"))
         (map to-boolean
              (extract-variable html "hasVideo"))
         (map to-boolean
              (extract-variable html "videoViewingsAccepted"))
         (extract-variable html "propertyTypes")
         (extract-variable html "hoursLive")
         (map to-boolean
              (extract-variable html "gardens"))
         (map to-boolean
              (extract-variable html "parkings"))
         (map to-boolean
              (extract-variable html "bills"))
         (map to-boolean
              (extract-variable html "fireplaces"))
         (map (lambda (offset)
                (date+ (current-date) offset))
              (extract-variable html "availableFrom"))
         (extract-variable html "minimumTenancy")
         (map string->number
              (extract-variable html "PROPERTYLISTCOMMUTEORDISTANCE")))))

(define (http-get-follow uri)
  "Like http-get, but follow redirects."
  (let ((response body (http-get uri #:decode-body? #f)))
    (if (= (response-code response) 301)
        ;; If redirect, follow.
        (let ((target (assq-ref (response-headers response)
                                'location)))
          (http-get-follow (build-uri (uri-scheme uri)
                                      #:userinfo (uri-userinfo target)
                                      #:host (uri-host uri)
                                      #:port (uri-port uri)
                                      #:path (uri-path target)
                                      #:query (uri-query target)
                                      #:fragment (uri-fragment target))))
        ;; Else, return.
        body)))

(define (uri->filename uri)
  "Convert @var{uri} to a safe filename."
  (string-replace-substring (uri->string uri) "/" "|"))

(define* (http-get* uri #:optional cache-live-time)
  "Like http-get, but cache and follow redirects."
  (let* ((uri (if (string? uri)
                  (string->uri uri)
                  uri))
         (cache-file (string-append %cache-directory "/" (uri->filename uri))))
    ;; If not cached or cache is stale, query and cache.
    (when (or (not (file-exists? cache-file))
              (and cache-live-time
                   (> (time-second (current-time))
                      (+ (stat:mtime (stat cache-file))
                         cache-live-time))))
      (call-with-atomic-output-file cache-file
        (cut put-bytevector <> (http-get-follow uri))))
    ;; Return contents of cache.
    (call-with-input-file cache-file
      get-bytevector-all)))

(define* (json-get uri #:optional cache-live-time)
  "Invoke @code{http-get*} on @var{uri} and process the response as
JSON."
  (call-with-port (open-bytevector-input-port
                   (http-get* uri cache-live-time))
    json->scm))

(define (house-page-sxml house)
  "Return the HTML of the page for @var{house} in SXML form."
  (call-with-port (open-bytevector-input-port
                   (http-get* (string-append %openrent-base-url
                                             "/" (number->string (house-property-id house)))))
    html->sxml))

(define (handle-top root . children)
  (any (match-lambda
         (('result . result) result)
         (_ #f))
       children))

(define (handle-default root . children)
  (or (find (match-lambda
              (('result . result) #t)
              (_ #f))
            children)
      root))

(define (house-title house)
  "Find the title of @var{house}."
  (pre-post-order
   (house-page-sxml house)
   `((h1 *preorder* . ,(lambda (root . children)
                         (sxml-match (cons root children)
                                     ((h1 (@ (class "property-title")) ,title)
                                      (cons 'result title)))))
     (*TOP* . ,handle-top)
     (*default* . ,handle-default))))

(define (house-maximum-tenants house)
  "Find the maximum number of tenants allowed in @var{house}."
  (pre-post-order
   (house-page-sxml house)
   `((td *preorder* . ,(lambda (root . children)
                         (sxml-match (cons root children)
                                     ((td (span (i (@ (class "fa fa-users"))) " Max Tenants:")
                                          " " (strong ,maximum-tenants))
                                      (cons 'result (string->number maximum-tenants)))
                                     (,_ #f))))
     (*TOP* . ,handle-top)
     (*default* . ,handle-default))))

(define (blank-string? str)
  "Return @code{#t} if @var{str} only contains whitespace
characters. Else, return @code{#f}."
  (and (string? str)
       (string-every char-set:whitespace str)))

(define (house-tube-stations house)
  "Return a list of tube stations near @var{house}."
  (pre-post-order
   (house-page-sxml house)
   `((tbody . ,(lambda (root . children)
                 (cons 'result (filter-map (match-lambda
                                             (('tube-station . station) station)
                                             (_ #f))
                                           children))))
     (tr *preorder* . ,(lambda (root . children)
                         (sxml-match (cons root (remove blank-string? children))
                                     ((tr (td . ,icon-children)
                                          (td . ,station-children)
                                          (td . ,walk-time-children))
                                      (and (any (lambda (child)
                                                  (sxml-match child
                                                              ((img (@ (alt "Underground"))) #t)
                                                              (,_ #f)))
                                                icon-children)
                                           (cons 'tube-station
                                                 (any (lambda (child)
                                                        (and (string? child)
                                                             (not (blank-string? child))
                                                             (string-trim-both child)))
                                                      station-children))))
                                     (,_ #f))))
     (*TOP* . ,(lambda args
                 (or (apply handle-top args)
                     '())))
     (*default* . ,handle-default))))

(define (cycling-distances source . destinations)
  "Compute the cycling distance between @var{source}, a point, and
@var{destinations}, a list of points. The returned distance is a list
of actual distances by cycle, and not distance as the crow flies."
  (map (cut / <> 1000)
       (vector->list
        (json-ref
         (json-get
          ;; We use ~f for latitude/longitude since ~a would use the
          ;; exponential float notation for small numbers and the API does
          ;; not like that.
          (format #f "~a/table/v1/bike/~a?annotations=distance&sources=0&destinations=~a"
                  %osrm-base-url
                  (string-join
                   (map (match-lambda
                          (#(latitude longitude)
                           (format #f "~f,~f" longitude latitude)))
                        (cons source destinations))
                   ";")
                  (string-join
                   (map number->string
                        (iota (length destinations) 1))
                   ";")))
         "distances" 0))))

(define (lines-at-station station-name)
  "Return a list of tube lines serving @var{station-name}."
  (cond
   ;; Correct slightly misnamed stations.
   ((string=? station-name "Paddington (Hammersmith & City)")
    (lines-at-station "Paddington"))
   ((member station-name (list "Hammersmith (District & Piccadilly)"
                               "Hammersmith (District and Piccadilly)"))
    (lines-at-station "Hammersmith"))
   (else
    (let* ((modes (list "dlr" "elizabeth-line" "tube"))
           (station-id
            ;; Assume the first search result is the station we are
            ;; looking for.
            (json-ref
             (json-get (string-append %tfl-base-url
                                      "/StopPoint/Search/"
                                      (uri-encode station-name)
                                      "?modes="
                                      (string-join modes ",")))
             "matches" 0 "id")))
      ;; Assume that we actually find tube lines at this station. This
      ;; may not be the case if the supplied station-name is not
      ;; actually a tube station.
      (vector->list
       (json-ref (find (lambda (mode-group)
                         (member (json-ref mode-group "modeName")
                                 modes))
                       (vector->list
                        (json-ref
                         (json-get (string-append %tfl-base-url
                                                  "/StopPoint/" station-id))
                         "lineModeGroups")))
                 "lineIdentifier"))))))

(define (list-house house)
  "Display details of @var{house} on the current output port."
  (apply format
         (current-output-port)
         "~a (posted ~a ago)
~a/~a
£~a pcm
Available from ~a
Cycling distance to work: ~,1f km
Cycling distance to Wembley stadium: ~,1f km
"
         (house-title house)
         (if (< (house-hours-live house) 24)
             (format #f "~a hours"
                     (house-hours-live house))
             (format #f "~a days"
                     (round-quotient (house-hours-live house)
                                     24)))
         %openrent-base-url
         (house-property-id house)
         (house-rent house)
         (date->string (house-available-from house) "~B ~d")
         (cycling-distances (house-coordinates house)
                            %work-coordinates
                            %wembley-stadium))
  (match (house-tube-stations house)
    (() (display "No tube!"))
    (tube-stations
     (display "Tube: ")
     (display (string-join (map (lambda (station)
                                  (string-append station " (" (string-join (lines-at-station station)) ")"))
                                tube-stations)
                           ", "))))
  (newline)
  (newline))

(define (list-houses houses)
  "Display details of @var{houses}, a list, on the current output port."
  (format (current-output-port) "~a houses~%~%" (length houses))
  (for-each list-house houses))

;; Find houses for which
;;
;; - bills are not included in the rent
;; - the advertisement has been alive for less than 100 hours
;; - is not shared
;; - there are 1 or fewer bedrooms
;; - the rent is between £1200 and £1400
;; - is unfurnished
;; - the advertisement is live
;; - at least 2 tenants are allowed
;; - there is at least one tube station in the vicinity
;; - is not a bedsit
;;
;; Sort the listed houses newest advertisments first.
(format (current-output-port)
        "~a houses~%"
        (list-transduce (compose (tfilter (lambda (house)
                                            (and (not (house-bills-included? house))
                                                 (< (house-hours-live house) 100)
                                                 (not (house-shared? house))
                                                 (<= (house-bedrooms house) 1)
                                                 (>= (house-rent house) 1200)
                                                 (<= (house-rent house) 1400)
                                                 (house-unfurnished? house)
                                                 (house-live? house)
                                                 (>= (house-maximum-tenants house) 2)
                                                 (not (null? (house-tube-stations house)))
                                                 (not (string-contains-ci (house-title house) "bedsit")))))
                                 (tlog (lambda (_ house)
                                         (list-house house))))
                        rcount
                        (sort (all-houses)
                              (lambda (house1 house2)
                                (< (house-hours-live house1)
                                   (house-hours-live house2))))))