;;; watson.el --- query web search engines and aggregate results ;;; ;;; Author: Eric Marsden ;;; Keywords: web, search ;; ;; Copyright (C) 1999-2000 Eric Marsden ;; ;; This program is free software; you can redistribute it and/or ;; modify it under the terms of the GNU General Public License as ;; published by the Free Software Foundation; either version 2 of ;; the License, or (at your option) any later version. ;; ;; This program is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU General Public License for more details. ;; ;; You should have received a copy of the GNU General Public ;; License along with this program; if not, write to the Free ;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, ;; MA 02111-1307, USA. ;; ;; Please send suggestions and bug reports to . ;; The latest version of this package should be available from ;; ;; ;;; Commentary: ;;; Overview ========================================================== ;; ;; watson.el is an emacs interface to web search engines such ;; as Altavista. Given a number of keywords to search for, it will ;; send the query to several search engines. The results are then ;; aggregated and displayed in a *watson* buffer. Currently backends ;; exist for the search engines Altavista, Google, Yahoo!, Excite, ;; Snap, ftpsearch, Dejanews and dmoz.org. ;; ;; Entry points: ;; ;; * `M-x watson' which queries for keywords in the minibuffer, ;; dispatches the requests, then pops up the results according to ;; the variable `watson-notify-method' ;; ;; * `M-x watson-referers' which queries for an URL, then queries ;; certain search engines to provide a list of web pages which link ;; to that URL ;; ;; * `M-x watson-form' which provides a full-page form which allows ;; you to customize different aspects of the search: limit the ;; query to a subset of the available backends; select synchronous ;; or asynchronous search. ;; ;; watson.el tries to rank hits intelligently: if an url is returned ;; by more than one search engine, its rank will be increased. Hits in ;; the same site are coalesced, with an increased rank. The ranking ;; also takes into account the order in which hits were presented by ;; the search engines. ;; ;; The *watson* buffer is set up so that URLs are clickable (using the ;; `browse-url' package to dispatch to your favorite browser). `n' and ;; `p' more to the next and previous match respectively, `?' issues a ;; `HEAD' request to the server to obtain information such as the date ;; of last modification of the file. button-3 in XEmacs pops up a ;; contextual menu. ;; ;; The watson module will issue multiple http requests in parallel if ;; the `watson-async' variable is non-nil (which is the default). In ;; this mode of operation it will use an external program such as lynx ;; for downloads (though Emacs/w3 is still required, for encoding ;; parameters). Otherwise requests are issued sequentially, using ;; Emacs/w3. On some braindead platforms without subprocess support ;; this is the only mode of operation which will work. ;; ;; watson.el was inspired by the Sherlock web search program ;; shipped with recent releases of MacOS. ;; ;; Tested with Emacs 20.4/Solaris, Emacs 20.2/NT, XEmacs 20.4/Solaris. ;; Please note that watson.el depends on carnal knowledge of the HTML ;; generated by the different search engines which it queries. This ;; HTML may change occasionally as search engines undergo relifts. In ;; this event Watson will no longer work correctly for that search ;; engine, since it will no longer be able to extract the useful ;; information from the HTML markup. If this occurs, it should signal ;; an error telling you to upgrade to a newer release (which you ;; should be able to obtain from the URL above). ;; ;; References: ;; ;; ;; ;; ;; ;; ;; @InProceedings{dsl97*1, ;; author = "Luca Cardelli and Rowan Davies", ;; title = "Service Combinators for Web Computing", ;; pages = "1--10", ;; ISBN = "1-880446-89-8", ;; booktitle = "Proceedings of the Conference on Domain-Specific ;; Languages ({DSL}-97)", ;; month = oct # "~15--17", ;; publisher = "USENIX Association", ;; address = "Berkeley", ;; year = "1997", ;; } ;; ;; ;; ;; ;; Thanks ============================================================ ;; ;; Thanks to Robert J. Chassell , Boris Goldowsky ;; , Christoph Conrad ;; and Marko Schütz ;; for many excellent suggestions. ;; TODO ============================================================== ;; ;; How to handle complex boolean queries? Could implement our own ;; syntax, like (and "one" (or "two" "three")), which we then convert ;; to each engine's format. But some of the best search engines (like ;; Google) don't support boolean queries. ;; ;; Customization. ;;; Code: (defconst watson-version 0.12 "The version number of watson.el") (require 'cl) (require 'backquote) (eval-when-compile (let (byte-compile-default-warnings) (defun watson-maybe-fbind (args) (while args (or (fboundp (car args)) (fset (car args) 'ignore)) (setq args (cdr args)))) (defun watson-maybe-bind (args) (mapcar (lambda (var) (unless (boundp var) (set var nil))) args)) (watson-maybe-fbind '(event-window popup-mode-menu w3-form-encode-xwfu)) (watson-maybe-bind '(mode-popup-menu)))) ;; Yahoo! now use Google, so don't bother with them. have to fix the ;; Excite backend: use powersearch to get predictable HTML. Snap and ;; dmoz broken currently (defvar watson-backends '(altavista google dejanews ftpsearch) "List of search engines for which a backend is available.") (defvar watson-backends/active watson-backends) (defvar watson-async t "*If non-nil, watson will run in asynchronous mode. In asynchronous mode watson will use the external program specified by `watson-get-command' to download multiple web pages simultaneously. Otherwise watson will use Emacs/W3's download features. This is slower, but will be the only mode of operation which works on certain platforms.") ;; or use `executable-find' (defun watson-executable-exists-p (exe &rest args) "Test whether the executable EXE exists. Try to execute it with CALL-PROCESS with arguments ARGS, returning its return status or nil if not found." (condition-case nil (apply #'call-process exe nil nil nil args) (file-error nil))) ;; Lynx is at . DOS & win32 ports ;; are available from . ;; w3m is a text-mode browser which handles tables better than lynx; see ;; . ;; ;; Note that Lynx 2.8.3 is buggy: it attempts to initialize the ;; terminal even when you ask for a source dump. ;; ;; I haven't been able to figure out how to use DDE under Microsoft ;; Windows to issue a WWW_SaveAs request to either Netscape or ;; Internet Explorer (if you know please help). ;; ;; ;; ;; It would probably also be possible to hack together something using ;; Netscape's remote invocation API: ;; ;; ;; netscape -noraise -remote openUrl(%s) URL ;; netscape -noraise -remote saveAs(/tmp/watson$$) ;; insert-file-contents (defvar watson-get-command ;; please send me any others which might be useful (cond ((watson-executable-exists-p "wget" "--version") "wget --output-document=-") ((watson-executable-exists-p "lynx" "-help") "lynx -source") ((watson-executable-exists-p "w3m" ) "w3m -dump_source") ;; GET is installed by libwww-perl ((watson-executable-exists-p "GET") "GET") (t (setq watson-async nil) "")) "*Command used by watson to download web pages when used asynchronously. Should be a string containing any necessary commandline switches. Examples are \"lynx -source\", or \"GET\".") (defvar watson-head-command "lynx -head -source" "*Command used by watson to retrieve document metainformation. Should be a string containing any necessary commandline switches. Examples are \"lynx -head -source\" or \"HEAD\".") (defvar watson-timeout 15 "*Number of seconds to wait before assuming a request is late. Watson will activate its buffer (according to the value of the variable `watson-notify-method') when all queries have completed or after this number of seconds. If you have a slow Internet connection it may be useful to set this to a higher value than the default, particularly if you are using watson in synchronous mode.") (defvar watson-url-mouse-face 'highlight "*Face to use for URLs when the mouse is over them.") (defvar watson-url-face 'bold "*Face to use for URLs.") (defvar watson-attribution-face 'italic "*Face to use for attribution of source of a hit.") (defvar watson-mode-hook nil "*Hook to run before entering watson-mode.") (defvar watson-extract-hook nil "*Hook to run before a search-engine specific extraction function.") (defvar watson-notify-method (if (boundp 'Man-notify-method) Man-notify-method 'bully) "*How to select the *watson* buffer once it is ready. Possible values are: * 'newframe: create a dedicated new frame (see `watson-frame-parameters') * 'pushy: make watson the current buffer in the current window * 'bully: make watson the current buffer and only window * 'aggressive: make watson the current buffer in the other window * 'friendly: display watson in the other window but don't make current * 'polite: don't display watson, but print message and beep when ready * 'quiet: like `polite', but don't beep * 'meek: make no indication that watson is ready Any other value of `watson-notify-method' is equivalent to `meek'.") (defvar watson-frame-parameters '((menu-bar-lines . 0)) "*Parameters to use when putting *watson* buffer in a new frame.") (defvar watson-frame) (defvar watson-arguments "") (defvar watson-matches '()) (defvar watson-match-count "0") (defvar watson-timer nil) (defvar watson-tick 0) (defvar watson-jobs (make-hash-table)) (defvar watson-form-keywords nil) (defvar watson-mode-map (let ((map (make-sparse-keymap))) (define-key map (kbd "q") #'bury-buffer) (define-key map (kbd "n") #'watson-next) (define-key map (kbd "p") #'watson-prev) (define-key map (kbd "TAB") #'watson-next) (define-key map (kbd "SPC") #'scroll-up) (define-key map (kbd "DEL") #'scroll-down) (define-key map (kbd "<") #'beginning-of-buffer) (define-key map (kbd ">") #'end-of-buffer) (define-key map (kbd "s") #'isearch-forward) (define-key map (kbd "r") #'isearch-backward) (define-key map (kbd "h") #'describe-mode) (define-key map (kbd "?") #'watson-probe-url) (define-key map (kbd "k") #'watson-kill-url) (define-key map (kbd "RET") #'watson-follow-ref) (define-key map [mouse-2] #'watson-mouse2) (define-key map [button3] #'watson-mouse3) (define-key map (kbd "w") #'(lambda () (interactive) (watson-follow-ref #'w3-fetch))) map) "Keymap used in Watson mode.") (defvar watson-contextual-menu '("Watson" ["Follow reference (browse-url)" watson-follow-ref :active t] ["Follow reference (w3)" (watson-follow-ref #'w3-fetch) :active t] ["Probe URL" watson-probe-url :active t] ["Kill URL" watson-kill-url :active t] ["Describe Mode" describe-mode :active t]) "Contextual menu used in Watson mode") ;; mostly from `iso-sgml.el' by Frederic Lepied (defvar watson-entity-table '(("<" . "<") (">" . ">") (" " . " ") (""" . "\"") ("&" . "&") ("Æ\;" . "Æ") ("Á\;" . "Á") ("Â\;" . "Â") ("À\;" . "À") ("Ã\;" . "Ã") ("Ç\;" . "Ç") ("É\;" . "É") ("È\;" . "È") ("Ë\;" . "Ë") ("Í\;" . "Í") ("Î\;" . "Î") ("Ì\;" . "Ì") ("Ï\;" . "Ï") ("Ñ\;" . "Ñ") ("Ó\;" . "Ó") ("Ô\;" . "Ô") ("Ò\;" . "Ò") ("Ø\;" . "Ø") ("Ú\;" . "Ú") ("Ù\;" . "Ù") ("Ý\;" . "Ý") ("á\;" . "á") ("â\;" . "â") ("æ\;" . "æ") ("à\;" . "à") ("å\;" . "å") ("ã\;" . "ã") ("ç\;" . "ç") ("é\;" . "é") ("ê\;" . "ê") ("è\;" . "è") ("ë\;" . "ë") ("í\;" . "í") ("î\;" . "î") ("ì\;" . "ì") ("ï\;" . "ï") ("ñ\;" . "ñ") ("ó\;" . "ó") ("ô\;" . "ô") ("ò\;" . "ò") ("ø\;" . "ø") ("õ\;" . "õ") ("ú\;" . "ú") ("û\;" . "û") ("ù\;" . "ù") ("ý\;" . "ý") ("Ä\;" . "Ä") ("ä\;" . "ä") ("Ö\;" . "Ö") ("ö\;" . "ö") ("Ü\;" . "Ü") ("ü\;" . "ü") ("ß\;" . "ß") ("§\;" . "§") ("¶\;" . "¶") ("©\;" . "©") ("¡\;" . "¡") ("¿\;" . "¿") ("¢\;" . "¢") ("£\;" . "£") ("×\;" . "×") ("±\;" . "±") ("÷\;" . "÷") ("¬\;" . "¬") ("&mu\;" . "µ") ("&Ae\;" . "Ä") ("&ae\;" . "ä") ("&Oe\;" . "Ö") ("&oe\;" . "ö") ("&Ue\;" . "Ü") ("&ue\;" . "ü") ("&sz\;" . "ß")) "Translation table from SGML entity references to ISO 8859-1 characters.") ;; placate the byte-compiler (defvar watson-buffer-url nil) ;; Each element of the list SOURCES is of the form (name . url), where ;; NAME is a string which will be used to in the *watson* display, and ;; URL is where the data came from (defstruct watson-match url short ; a short description, normally the title long ; a longer description, normally context for the match sources ; a list of search engines which provided this match (score 0)) ; a rating we give to this match ;; ======================================================================= ;; ;; A watson backend named must provide two functions: ;; `watson--url', which given a search string should return the ;; URL appropriate for the backend, and `watson--extract' which, ;; when run in a buffer containing the HTML source at that url, should ;; extract the hits and call `watson-add-match' for each hit. It ;; should also provide a integer variable `watson--score' which ;; determines the weighting to give results from that engine. ;; ;; The architecture of watson is a little complex, to support its ;; asynchronous and synchronous operating modes. In asynchronous mode, ;; queries are issued in the background using subprocesses. When a ;; subprocess exits, a sentinel function is called in the process' ;; buffer. The sentinel is responsible for calling the appropriate ;; `-extract' function and deleting the buffer. The extraction ;; function adds its hits to the variable `watson-matches' (their ;; score is updated as they are added) then schedules a call to ;; `watson-display-matches'. This function (which is run from an idle ;; timer if they are available, to avoid locking up emacs) clears the ;; *watson* buffer of its current contents, and redisplays taking into ;; account the new hits. This results in relatively smooth operation ;; despite the large amount of processing going on in the background. ;; ;; ======================================================================== ;;;###autoload (defun watson (keywords &optional backends) "Lookup a word or phrase on various search engines." (interactive (list (read-string "Watson lookup for: " (current-word)))) (watson-init) (setq watson-arguments (format "%s" keywords)) (message "Looking for matches...") (dolist (backend (or backends watson-backends)) (watson-fetch backend keywords))) ;;;###autoload (defun watson-referers (url) "List web pages which link to URL" (interactive (list (read-string "Pages linking to URL: " "http://"))) (watson-init) (setq watson-arguments (concat "Referers to " url)) (message "Looking for referers...") (watson-fetch-referers url)) (defun watson-init () (let ((buf (get-buffer "*watson*"))) ;; buggy MULE interaction with w3 (unless (boundp 'mule-sysdep-version) (setq mule-sysdep-version nil)) (setq watson-frame (selected-frame)) (setq watson-match-count "0") (setq watson-matches '()) ; remove previous matches (clrhash watson-jobs) ; remove any uncompleted async jobs (setq watson-tick 0) (watson-timer-start) (when buf (set-buffer buf) (setq buffer-read-only nil) (erase-buffer)))) ;; thanks to Robert J. Chassell ;;;###autoload (defun watson-version (&optional here) "Show the version number of watson in the minibuffer. If optional argument HERE is non-nil, insert version number at point." (interactive "P") (let ((version-string (format "Watson version %s" watson-version))) (if here (insert version-string) (if (interactive-p) (message "%s" version-string) version-string)))) (defun watson-fetch-referers (url) (watson-fetch 'altavista url :referers) (watson-fetch 'snap url :referers) (watson-fetch 'google url :referers) (watson-fetch 'yahoo url :referers) (watson-fetch 'dmoz url :referers) (watson-fetch 'excite url :referers)) (defun watson-mouse2 (event) (interactive "e") ;; go to where the event occurred (cond ((fboundp 'event-window) ; XEmacs (set-buffer (window-buffer (event-window event))) (and (event-point event) (goto-char (event-point event)))) ((fboundp 'posn-window) ; Emacs (set-buffer (window-buffer (posn-window (event-start event)))) (goto-char (posn-point (event-start event))))) (watson-follow-ref)) (defun watson-mouse3 (event) (interactive "e") (cond ((fboundp 'event-window) ; XEmacs (set-buffer (window-buffer (event-window event))) (and (event-point event) (goto-char (event-point event))) (popup-mode-menu)))) (defun watson-follow-ref (&optional maybe-fetcher) (interactive) (let* ((data (get-text-property (point) 'watson)) (fetcher (or maybe-fetcher #'browse-url))) (cond ((watson-match-p data) (funcall fetcher (watson-match-url data))) ;; it's the source (search engine) url ((stringp data) (funcall fetcher data))))) (defun watson-kill-url () "Add URL of match at point to the kill ring" (interactive) (let* ((data (get-text-property (point) 'watson)) (url (cond ((watson-match-p data) (watson-match-url data)) ((stringp data) data)))) (kill-new url) (message "Copied %s to the kill ring" url))) (defmacro watson-insert-with-text-property (text prop) `(let ((tp-start (point))) (insert ,text) (put-text-property tp-start (point) 'watson ,prop))) ;; FIXME make this user-customizable (format-like control string) (defun watson-display-match (m) (let (first start local-start end) (goto-char (point-max)) (setq first (point)) (setq start (point)) (insert (watson-match-url m)) (setq end (point)) (add-text-properties start end (list 'face watson-url-face 'mouse-face watson-url-mouse-face)) (insert (format " <%s>" (watson-match-score m))) (insert (format "\n (%s)\n" (watson-match-short m))) (setq start (point)) (insert (watson-match-long m)) (setq end (point)) (fill-region start end) (indent-rigidly start end 3) (goto-char (point-max)) (put-text-property first (point) 'watson m) (setq start (point)) (insert " [") (loop for s in (watson-match-sources m) do (watson-insert-with-text-property (car s) (cdr s)) (insert ", ") finally do (delete-char -2)) (insert "]") (put-text-property start (point) 'face watson-attribution-face) (insert "\n\n"))) (defun watson-really-display-matches () (let ((buf (get-buffer-create "*watson*"))) (save-excursion (set-buffer buf) (setq buffer-read-only nil) (erase-buffer) (loop for m in watson-matches do (watson-display-match m)) (goto-char (point-min)) (watson-mode) (set-buffer-modified-p nil)))) (defun watson-display-matches () ;; sort the matches according to their score (setq watson-matches (sort watson-matches #'(lambda (a b) (> (watson-match-score a) (watson-match-score b))))) ;; display them, avoiding locking up emacs if possible. For some ;; reason the XEmacs #'run-with-idle-timer doesn't work. (if (and (fboundp 'run-with-idle-timer) (not (featurep 'xemacs))) (run-with-idle-timer 1 nil #'watson-really-display-matches) (watson-really-display-matches))) ;; add a new match, checking if the same URL is not already present, ;; in which case we increase its score. (defun watson-add-match (new) ;; sanity checks (unless (stringp (watson-match-url new)) (setf (watson-match-url new) "")) (unless (stringp (watson-match-short new)) (setf (watson-match-short new) "")) (unless (stringp (watson-match-long new)) (setf (watson-match-long new) "")) ;; handle DejaNews hits specially: check that we got a valid ;; hit (not a "search help" page) and don't aggregate (let ((sources (watson-match-sources new))) (if (string= "DejaNews" (car (first (watson-match-sources new)))) (push new watson-matches) (loop with new-url = (watson-match-url new) with found = nil for old in watson-matches for old-url = (watson-match-url old) if (string= old-url new-url) do ;; exact match from two different search engines: increase the ;; URL's score (incf (watson-match-score old) 40) (pushnew (first (watson-match-sources new)) (watson-match-sources old) :test #'(lambda (a b) (string= (car a) (car b)))) (setq found t) else if (> (mismatch old-url new-url) 15) do ;; partial match from two different search engines (probably ;; links to the same site) (incf (watson-match-score old) 10) (pushnew (first (watson-match-sources new)) (watson-match-sources old) :test #'(lambda (a b) (string= (car a) (car b)))) (setq found t) finally (unless found (push new watson-matches))) ;; for the modeline display (setq watson-match-count (number-to-string (length watson-matches)))))) (defun watson-mode () "Major mode for browsing results of queries sent to Internet search engines. Attempts to sort matches by interest. Results are displayed in a special *watson* buffer, which is displayed once all search engines have returned their results, or after `watson-timeout' seconds (whichever comes first). The *watson* buffer is selected according to the value of the variable `watson-notify-method'. \\{watson-mode-map}" (interactive) (require 'browse-url) (use-local-map watson-mode-map) (setq major-mode 'watson-mode mode-name "Watson" buffer-read-only t) ;; only useful in XEmacs (setq mode-popup-menu watson-contextual-menu) (setq mode-line-format '("-" mode-line-mule-info mode-line-modified " Watson: " watson-arguments " / " watson-match-count " matches %[(" mode-line-process minor-mode-alist "%n" ")%]--" (line-number-mode "L%l--") (column-number-mode "C%c--") (-3 . "%p") "-%-")) (run-hooks 'watson-mode-hook)) ;; use text properties instead of silly parsing of the text around ;; point. Makes the code more robust, and should work even if the ;; user changes the display format for matches. (defmacro watson-goto-next-property-change () '(let (target) (setq target (next-single-property-change (point) 'watson)) (if target (goto-char target) (message "On last match")))) (defmacro watson-goto-prev-property-change () '(let (target) (setq target (previous-single-property-change (point) 'watson)) (if target (goto-char target) (message "On first match")))) (defun watson-next () "Move to next match in the buffer." (interactive) (watson-goto-next-property-change) (unless (get-text-property (point) 'watson) (watson-goto-next-property-change))) (defun watson-prev () "Move to previous match in the buffer." (interactive) (watson-goto-prev-property-change) (unless (get-text-property (point) 'watson) (watson-goto-prev-property-change))) (defun watson-probe-url () "Issue a HEAD request to the URL at point and display the headers returned by the web server." (interactive) (let* ((components (split-string watson-head-command)) (watson-head-program (car components)) (watson-head-arguments (cdr components)) (data (get-text-property (point) 'watson)) (url (cond ((watson-match-p data) (watson-match-url data)) ((stringp data) data)))) (message "Probing url %s ..." url) (if watson-async (apply #'watson-add-job #'watson-probe-url-helper watson-head-program (append watson-head-arguments (list url))) ;; else use W3 functions (progn (require 'url) (with-output-to-temp-buffer "*watson-info*" (princ (url-popup-info url)) (shrink-window-if-larger-than-buffer)) (message "Probing url...done"))))) (defun watson-probe-url-helper () (with-output-to-temp-buffer "*watson-info*" (princ (buffer-substring (point-min) (point-max))) (shrink-window-if-larger-than-buffer)) (message "Probing url...done")) (defun watson-fetch (backend keywords &optional referers) (let* ((backend-str (symbol-name backend)) (referer-str (if referers "/referers" "")) (fetcher (intern (concat "watson-" backend-str "-matches"))) (extractor (intern (concat "watson-" backend-str "-extract" referer-str))) (urler (intern (concat "watson-" backend-str "-url" referer-str))) (url (funcall urler keywords)) (components (split-string watson-get-command)) (watson-get-program (car components)) (watson-get-arguments (cdr components))) (if watson-async ;; warning: may be fatal if swallowed (apply #'watson-add-job `(lambda () (make-local-variable 'watson-buffer-url) (setq watson-buffer-url ,url) (run-hooks 'watson-extract-hook) (condition-case why (,extractor) (search-failed (watson-urge-upgrade why))) (watson-display-matches)) watson-get-program (append watson-get-arguments (list url))) ;; synchronous case: use w3 functions (let* ((bufname (concat " *watson-" backend-str "*")) (buf (get-buffer-create bufname))) (save-excursion (set-buffer buf) (erase-buffer) (make-local-variable 'watson-buffer-url) (setq watson-buffer-url url) (require 'url) (setq buffer-offer-save nil) (url-insert-file-contents url) (set-buffer-modified-p nil) (setq buffer-file-name nil) ; avoid "save buffer" questions (run-hooks 'watson-extract-hook) (condition-case why (funcall extractor) (search-failed (watson-urge-upgrade why))) (watson-display-matches)))))) ;; asynchronous process handling (defun watson-add-job (handler job &rest args) (let* ((buf (generate-new-buffer " *watson-async*")) (proc (apply #'start-process "watson-async" buf job args))) (setf (gethash proc watson-jobs) handler) (set-process-sentinel proc #'watson-sentinel))) (defun watson-sentinel (process event) (when (eq (process-status process) 'exit) (let ((buf (process-buffer process)) (handler (gethash process watson-jobs))) (when handler (save-excursion (unwind-protect (progn (set-buffer buf) (funcall handler)) (remhash process watson-jobs) (kill-buffer buf))))))) ;; this is run regularly while a watson search is in progress. If all ;; the requests have been treated, we switch to the *watson* buffer ;; (using user-customizable method from `watson-notify-method'). If ;; `watson-timeout' seconds have passed without all requests having ;; finished, we display the buffer anyway. If no matches have been ;; found at timeout, display a "no matches" message. (defun watson-timer-function (&rest ignore) (incf watson-tick) (cond ((zerop (hash-table-count watson-jobs)) (watson-timer-stop) (watson-notify-when-ready)) ((> watson-tick watson-timeout) (watson-timer-stop) (if (zerop (length watson-matches)) (message "Watson: no matches found after %s seconds" watson-timeout) (watson-notify-when-ready))))) ;; I used to try to work out whether we were using timers or itimers, ;; but the code didn't work on XEmacs 21.x for some reason. Use the ;; compatibility functions instead. (defun watson-timer-start () (setq watson-timer (run-with-timer 1 1 #'watson-timer-function))) (defun watson-timer-stop () (cancel-timer watson-timer) (setq watson-timer nil)) ;; adapted from `Man-notify-method' in man.el. This should really be ;; split to a general notify.el (defun watson-notify-when-ready () "Notify the user when the *watson* buffer is ready. See the variable `watson-notify-method' for the different notification behaviors." (cond ((eq watson-notify-method 'newframe) ;; Since we run asynchronously, perhaps while Emacs is waiting ;; for input, we must not leave a different buffer current. We ;; can't rely on the editor comwatsond loop to reselect the ;; selected window's buffer. (save-excursion (let ((frame (make-frame watson-frame-parameters))) (set-window-buffer (frame-selected-window frame) "*watson*") (set-window-dedicated-p (frame-selected-window frame) t)))) ((eq watson-notify-method 'pushy) (switch-to-buffer "*watson*")) ((eq watson-notify-method 'bully) (and window-system (frame-live-p watson-frame) (select-frame watson-frame)) (pop-to-buffer "*watson*") (delete-other-windows)) ((eq watson-notify-method 'aggressive) (and window-system (frame-live-p watson-frame) (select-frame watson-frame)) (pop-to-buffer "*watson*")) ((eq watson-notify-method 'friendly) (and window-system (frame-live-p watson-frame) (select-frame watson-frame)) (display-buffer "*watson*" 'not-this-window)) ((eq watson-notify-method 'polite) (beep) (message "Watson buffer is ready")) ((eq watson-notify-method 'quiet) (message "Watson buffer is ready")) ((or (eq watson-notify-method 'meek) t) (message "")))) ;; from nnweb.el (defun watson-form-encode (pairs) "Return PAIRS encoded for forms." (require 'w3-forms) (require 'url) ; shouldn't be necessary (mapconcat (function (lambda (data) (concat (w3-form-encode-xwfu (car data)) "=" (w3-form-encode-xwfu (cdr data))))) pairs "&")) (defun watson-decode-entities (str) "Return STR with HTML entities decoded." (let ((buf (get-buffer-create " *watson-decode*"))) (save-excursion (set-buffer buf) (erase-buffer) (insert str) (loop for entity in watson-entity-table do (goto-char (point-min)) (while (search-forward (car entity) nil t) (replace-match (cdr entity) t t))) (buffer-string)))) (defun watson-remove-tags (str) "Return STR with HTML tags removed." (let ((buf (get-buffer-create " *watson-detag*"))) (save-excursion (set-buffer buf) (erase-buffer) (insert str) (while (re-search-forward "<[^>]+>" nil t) (replace-match "" t t)) (buffer-string)))) (defun watson-urge-upgrade (why) (message "failed regexp: %s" why) (remhash (get-buffer-process (current-buffer)) watson-jobs) (kill-buffer (current-buffer)) (loop for b in watson-backends for backend-str = (symbol-name b) for regexps = (symbol-value (intern (concat "watson-" backend-str "-regexps"))) when (find (second why) regexps :test #'string=) do (message (concat "Removing search engine %s" " (please check for a new version of watson)") backend-str) (setq watson-backends (delete b watson-backends)))) ;; (with-output-to-temp-buffer "Watson: upgrade now!" ;; (princ (format " ;; ============= W A T S O N : U P G R A D E N O W ===================== ;; ;; Watson depends on carnal knowledge of the HTML generated by the ;; different search engines which it queries. This HTML may change ;; occasionally as search engines undergo relifts. In this event Watson ;; will no longer work correctly for that search engine, since it will no ;; longer be able to extract the useful information from the HTML markup. ;; ;; It appears that this problem has occurred for the %s engine. This ;; engine has been disabled for future searches in the current emacs ;; session (you can make the change permanent by setting the variable ;; `watson-backends' in your initialization file). An updated version of ;; Watson may be available which resolves this problem. If you wish the ;; package to work correctly, check the URL ;; ;; ;; ;; and replace the current version with the new one (and bytecompile the ;; new file). If there isn't a more recent version which fixes the ;; problem, please email the author at ;; to inform me of the problem." backend-str))))) ;; ======================================================================= ;; widget/form handling code (defun watson-form () (interactive) (require 'widget) (require 'wid-browse) ; XEmacs 20.4 bug (switch-to-buffer "*Watson Data Entry*") (kill-all-local-variables) (let ((inhibit-read-only t)) (erase-buffer)) (widget-insert " /------------ W A T S O N D A T A E N T R Y ----------------\\ This buffer allows you to enter data for a complex Watson search. You provide the keywords, and select which search engines should be queried for this search. Once you have finished entering the data, press the \"Submit query\" button and watson will query the different search engines and display matches they return. Press TAB to jump between buttons, and RET or click mouse-1 to modify the value of a widget. ") (setq watson-form-keywords (widget-create 'string :tag " Keywords" :sample-face 'bold :size 25)) (widget-insert "\n\nUsing search engines:\n\n") (dolist (backend watson-backends) (put backend 'watson-enabled t) (widget-create 'checkbox :format "%[%v%] " :button-prefix " " :notify `(lambda (self &rest ignore) (put ',backend 'watson-enabled (widget-value self))) :help-echo "Should we use this search engine?" t) (widget-insert (format "%s\n" backend))) ;; if an external browser isn't configured (or wasn't ;; auto-detected), don't allow the user to select asynchronous mode (unless (string= watson-get-command "") (widget-insert "\n\nDownload mode:\n\n") (widget-create 'radio-button-choice :entry-format " %b %v" :value (if watson-async "Asynchronous" "Synchronous") :notify (lambda (self &rest ignore) (setq watson-async (string= (widget-value self) "Asynchronous"))) '(item "Asynchronous" :help-echo "async download" :button-prefix " ") '(item "Synchronous" :help-echo "synchronous download"))) ;; submit/reset (widget-insert "\n\n ") (widget-create 'push-button :notify #'watson-form-go :help-echo "Start the search" "Submit query") (widget-insert " ") (widget-create 'push-button :notify (lambda (&rest ignore) (watson-form)) :help-echo "Reset form to default values" "Reset Form") (widget-insert " \\----------------------------------------------------------------/") (use-local-map widget-keymap) (widget-setup) (goto-char (point-min)) (widget-forward 1)) (defun watson-form-go (&rest ignore) (setq watson-backends/active '()) (dolist (backend watson-backends) (when (get backend 'watson-enabled) (push backend watson-backends/active))) (watson (widget-value watson-form-keywords) watson-backends/active)) ;; ========================================================================= (defvar watson-altavista-score 30 "*Weighting to give to matches coming from Altavista. An integer.") (defun watson-altavista-url (keywords) (let* ((pairs `(("pg" . "aq") ("enc" . "iso88591") ("text" . "yes") ("q" . ,keywords))) (data (watson-form-encode pairs))) (concat "http://www.altavista.com/cgi-bin/query?" data))) (defun watson-altavista-url/referers (keywords) (if (string-match "^http://\\(.*\\)$" keywords) (watson-altavista-url (concat "link:" (match-string 1 keywords))) (watson-altavista-url (concat "link:" keywords)))) (defvar watson-altavista-regexps '(">>]
" "^Result Pages: " "$" "...$")) ; ^
(defun watson-altavista-extract () (let (url title context start score) (goto-char (point-min)) (unless (search-forward "

AltaVista found no document matching" nil t) (goto-char (point-min)) (when (re-search-forward (first watson-altavista-regexps)) (delete-region (point-min) (match-end 0))) (goto-char (point-max)) (when (re-search-backward (second watson-altavista-regexps)) (delete-region (point-max) (match-beginning 0))) (goto-char (point-min)) (setq score watson-altavista-score) (while (re-search-forward "^

\n\\([0-9]+\\)\\.\\s-+\n\n" nil t) (setq url (match-string 2)) (setq start (point)) (re-search-forward (third watson-altavista-regexps)) (setq title (buffer-substring start (match-beginning 0))) (setq start (+ (point) 5)) (re-search-forward (fourth watson-altavista-regexps)) (setq context (buffer-substring start (match-beginning 0))) (watson-add-match (make-watson-match :url url :short (watson-decode-entities title) :long (watson-decode-entities context) :score score :sources (list (cons "Altavista" watson-buffer-url)))) (decf score))))) (defalias 'watson-altavista-extract/referers #'watson-altavista-extract) ;; ========================================================================= ;; Google is an excellent engine which rates sites based on the number ;; of links which point to them. Most of the time this is a useful ;; criteria. (defvar watson-google-score 40 "*Weighting to give to matches coming from Google. An integer.") (defun watson-google-url (keywords) (let* ((pairs `(("q" . ,keywords) ("num" . "30"))) (data (watson-form-encode pairs))) (concat "http://www.google.com/search?" data))) (defun watson-google-url/referers (url) (watson-google-url (concat "link:" url))) (defvar watson-google-regexps '("^$" "Result Page:" "
\\s-*..." "
")) (defun watson-google-extract () (let (url title context start score) (goto-char (point-min)) (unless (search-forward "- did not match any documents." nil t) (goto-char (point-min)) (when (re-search-forward (first watson-google-regexps)) (delete-region (point-min) (point))) (goto-char (point-max)) (when (search-backward (second watson-google-regexps)) (delete-region (point-max) (match-beginning 0))) (goto-char (point-min)) (setq score watson-google-score) (while (re-search-forward "

]+\\)>\\(.+\\)" nil t) (setq url (match-string 1)) (setq title (match-string 2)) (while (string-match "<[^>]+>" title) (setq title (replace-match "" t t title))) (setq start (re-search-forward (third watson-google-regexps))) (search-forward (fourth watson-google-regexps)) (setq context (buffer-substring start (match-beginning 0))) (while (string-match "<[^>]+>" context) (setq context (replace-match "" t t context))) (watson-add-match (make-watson-match :url url :short (watson-decode-entities title) :long (watson-decode-entities context) :score score :sources (list (cons "Google" watson-buffer-url)))) (decf score))))) (defalias 'watson-google-extract/referers #'watson-google-extract) ;; ========================================================================== (defvar watson-yahoo-score 20 "*Weighting to give to matches coming from Yahoo! An integer.") (defun watson-yahoo-url (keywords) (let* ((pairs `(("p" . ,keywords))) (data (watson-form-encode pairs))) (concat "http://ink.yahoo.com/bin/query?" data))) (defun watson-yahoo-url/referers (url) (watson-yahoo-url (concat "link:" url))) (defvar watson-yahoo-regexps '("

    \n" "
\n
\n" "
--http://")) (defun watson-yahoo-extract () (let (url title context start score) (goto-char (point-min)) (unless (search-forward "Sorry, no matches were found containing" nil t) (goto-char (point-min)) (when (search-forward (first watson-yahoo-regexps)) (delete-region (point-min) (match-beginning 0))) (goto-char (point-max)) (when (re-search-backward (second watson-yahoo-regexps)) (delete-region (point-max) (match-beginning 0))) (goto-char (point-min)) (setq score watson-yahoo-score) (while (re-search-forward "
  • \\([^<]+\\) - " nil t) (setq url (match-string 1)) (setq title (match-string 2)) (while (string-match "<[^>]+>" title) (setq title (replace-match "" t t title))) (setq start (match-end 0)) (search-forward (third watson-yahoo-regexps)) (setq context (buffer-substring start (match-beginning 0))) (while (string-match "<[^>]+>" context) (setq context (replace-match "" t t context))) (watson-add-match (make-watson-match :url url :short (watson-decode-entities title) :long (watson-decode-entities context) :score score :sources (list (cons "Yahoo!" watson-buffer-url)))) (decf score))))) (defalias 'watson-yahoo-extract/referers #'watson-yahoo-extract) ;; ========================================================================== (defvar watson-excite-score 10 "*Weighting to give to matches coming from Excite. An integer.") (defun watson-excite-url (keywords) (let* ((pairs `(("search" . ,keywords) ("perPage" . "30"))) (data (watson-form-encode pairs))) (concat "http://search.excite.com/search.gw?" data))) (defun watson-excite-url/referers (url) (watson-excite-url (concat "link:" url))) (defvar watson-excite-regexps '("Results]" "
    \\(.*\\)")) (defun watson-excite-extract () (let (url title context score) (goto-char (point-min)) (unless (search-forward "Your search produced no results" nil t) (goto-char (point-min)) (when (search-forward (first watson-excite-regexps)) (delete-region (point-min) (match-beginning 0))) (goto-char (point-max)) (when (search-backward (second watson-excite-regexps)) (delete-region (point-max) (match-beginning 0))) (setq score watson-excite-score) (goto-char (point-min)) (while (re-search-forward "[0-9]+%\\s-+[^>]+>\\([^<]+\\)&nb" nil t) (setq title (match-string 1)) (re-search-forward (third watson-excite-regexps)) (setq context (match-string 1)) (setq url (match-string 2)) (watson-add-match (make-watson-match :url url :short (watson-decode-entities title) :long (watson-decode-entities context) :score score :sources (list (cons "Excite" watson-buffer-url)))) (decf score))))) ;; FIXME apparently not true (defalias 'watson-excite-extract/referers #'watson-excite-extract) ;; =========================================================================== ;; ftpsearch regularly scans anonymous ftp archives and maintains a ;; database of files available at each site. This search will probably ;; only provide useful results if you use a single keyword. (defvar watson-ftpsearch-score -5 "*Weighting to give to matches coming from Ftpsearch. An integer.") (defun watson-ftpsearch-url (keywords) (let* ((pairs `(("form" . "medium") ("query" . ,keywords) ("doit" . "Got Get It!") ("filetype" . "All files") ("hits" . "30"))) (data (watson-form-encode pairs))) (concat "http://ftpsearch.lycos.com/cgi-bin/search?" data))) (defvar watson-ftpsearch-regexps '("\n\n
    "
        "
    \n
    ]+>\\([^<]+\\)\\s-+]+>\\([^<]+\\)$" nil t) (setq host (match-string 1)) (setq path (match-string 2)) (watson-add-match (make-watson-match :url (concat "ftp://" host path) :short "No title" :long "" :score score :sources (list (cons "ftpsearch" watson-buffer-url)))) (decf score))))) ;; ========================================================================== ;; Dmoz, the ex Open Directory now owned by Netscape. (defvar watson-dmoz-score 5 "*Weighting to give to matches coming from Dmoz, the Open Directory. An integer.") (defun watson-dmoz-url (keywords) (let* ((pairs `(("search" . ,keywords))) (data (watson-form-encode pairs))) (concat "http://search.dmoz.org/cgi-bin/search?" data))) (defun watson-dmoz-url/referers (url) (watson-dmoz-url (concat "link:" url))) (defvar watson-dmoz-regexps '("\n

      \n

      ")) (defun watson-dmoz-extract () (let (url title context score) (goto-char (point-min)) (unless (search-forward "
      No \\([^<]+\\) - \\(.*\\)
      " nil t) (setq url (match-string 1)) (setq title (match-string 2)) (setq context (match-string 3)) (while (string-match "<[^>]+>" context) (setq context (replace-match "" t t context))) (watson-add-match (make-watson-match :url url :short (watson-decode-entities title) :long (watson-decode-entities context) :score score :sources (list (cons "dmoz" watson-buffer-url)))) (decf score))))) (defalias 'watson-dmoz-extract/referers #'watson-dmoz-extract) ;; ========================================================================== (defvar watson-snap-score 10 "*Weighting to give to matches coming from Altavista. An integer.") (defun watson-snap-url (keywords) (let* ((pairs `(("keyword" . ,keywords) ("tag" . "st.sn.fdsb"))) (data (watson-form-encode pairs))) (concat "http://nscp.snap.com/search/directory/results/1,61,nscp-0,00.html?" data))) (defun watson-snap-url/referers (keywords) (let* ((pairs `(("KM" . "u") ("KW" . ,keywords))) (fixed "&AM0=m&AT0=w&AK0=&AN=1&NR=20&FR=f&PL=a&DR=0&FM=1&FD=1&FY=98&TM=9&TD=13&TY=99&XT=&DM=&LN=") (data (concat (watson-form-encode pairs) fixed))) (concat "http://www.snap.com/search/power/results/1,180,home-0,00.html?" data))) (defvar watson-snap-regexps '("Helvetica\" size=-1>
      " "

    Found at:.*\\(http[^>]+\\)" "
    " "
    \n\\([^<]+\\)
    " "Found at: \\(http[^<]+\\)")) (defun watson-snap-extract () (let (url title context score) (goto-char (point-min)) (unless (search-forward "Sorry, no results found." nil t) (goto-char (point-min)) (when (search-forward (first watson-snap-regexps)) (delete-region (point-min) (match-end 0))) (goto-char (point-max)) (when (search-backward (second watson-snap-regexps)) (delete-region (point-max) (match-beginning 0))) (goto-char (point-min)) (setq score watson-snap-score) (while (re-search-forward "

    ]+>\\([^<]+\\)
    \\([^<]+\\)
    " nil t) (setq title (match-string 1)) (setq context (match-string 2)) (re-search-forward (third watson-snap-regexps)) (setq url (match-string 1)) (while (string-match "<[^>]+>" title) (setq title (replace-match "" t t title))) (while (string-match "<[^>]+>" context) (setq context (replace-match "" t t context))) (watson-add-match (make-watson-match :url url :short (watson-decode-entities title) :long (watson-decode-entities context) :score score :sources (list (cons "Snap" watson-buffer-url)))) (decf score))))) (defun watson-snap-extract/referers () (let (url title context) (goto-char (point-min)) (unless (search-forward "Sorry, no results found." nil t) (when (re-search-forward (fourth watson-snap-regexps)) (delete-region (point-min) (match-end 0))) (goto-char (point-max)) (when (search-backward (fifth watson-snap-regexps)) (delete-region (point-max) (match-beginning 0))) (goto-char (point-min)) (while (re-search-forward "^ +\\([^<]+\\)" nil t) (setq title (match-string 1)) (re-search-forward (sixth watson-snap-regexps)) (setq context (match-string 1)) (re-search-forward (seventh watson-snap-regexps)) (setq url (match-string 1)) (watson-add-match (make-watson-match :url url :short (watson-decode-entities title) :long (watson-decode-entities context) :sources (list (cons "Snap" watson-buffer-url)))))))) ;; ======================================================================== ;; DejaNews archives USENET articles and big mailing lists. A huge ;; resource of information which is occasionally very useful. (defvar watson-dejanews-score -100 "*Weighting to give to matches coming from DejaNews. An integer.") (defun watson-dejanews-url (keywords) (let* ((pairs `(("ST" . "PS") ("subjects" . "") ("groups" . "") ("authors" . "") ("fromdate" . "") ("todate" . "") ("defaultOp" . "AND") ("DBS" . "complete") ("OP" . "CLASSIC") ("LNG" . "ALL") ("svcclass" . "dnserver") ("showsort" . "date") ("maxhits" . "50") ("format" . "terse") ("QRY" . ,keywords))) (data (watson-form-encode pairs))) (concat "http://www.deja.com/qs.xp?" data))) (defvar watson-dejanews-regexps '(">Author" "" "\\([^<]+\\)[^>]+>\\([^<]+\\)")) (defun watson-dejanews-extract () (let (url title context score) (goto-char (point-min)) (unless (search-forward "Your search did not match any articles" nil t) (goto-char (point-min)) (when (search-forward (first watson-dejanews-regexps)) (delete-region (point-min) (match-end 0))) (goto-char (point-max)) (when (search-backward (second watson-dejanews-regexps)) (delete-region (point-max) (match-beginning 0))) (goto-char (point-min)) (setq score watson-dejanews-score) (while (re-search-forward "^[^<]+\\([^<]+\\)" nil t) (setq url (match-string 1)) (setq title (match-string 2)) (re-search-forward (third watson-dejanews-regexps)) (setq context (concat "Newsgroup: " (match-string 1) "\n" "Author: " (match-string 2))) ;; rewrite the URL to request the original text version of the ;; USENET posting (when (string-match "http://\\(x[0-9]+\\)\\.deja\\.com/=dnc/getdoc\\.xp\\?AN=\\([0-9]+\\)" url) (setq url (concat "http://www.deja.com/article/" (match-string 2 url) "&fmt=raw")) (watson-add-match (make-watson-match :url url :short (watson-decode-entities title) :long (watson-decode-entities context) :score score :sources (list (cons "DejaNews" watson-buffer-url)))) (decf score)))))) (defun watson-test () (interactive) (let* ((watson-backends '(altavista google yahoo dejanews ftpsearch)) (backend-alist (mapcar (lambda (b) (cons (symbol-name b) b)) watson-backends)) (backend-string (completing-read "Backend: " backend-alist nil t)) (backend (cdr (assoc backend-string backend-alist))) (watson-async nil) (debug-on-error t)) (watson-fetch backend "emacs"))) ;; example of using watson functionality programatically, to refine ;; search results ;; (defun watson-refine () ;; (let ((watson-notify-method 'meek) ;; (interesting '())) ;; (watson "LDAP gateway") ;; (dolist (match watson-matches) ;; (when (or (string-match "[Jj]ob" (watson-match-short match)) ;; (string-match "[Jj]ob" (watson-match-long match))) ;; (push match interesting))) ;; (setq watson-matches interesting) ;; (watson-really-display-matches))) (provide 'watson) ;; watson.el ends here