;;;; Author: Steve Wolfman ;;;; ;;;; This file contains the basis for a classifier constructor. The ;;;; constructor uses very specific formats for data points and ;;;; features which are described on the project page for HW#3, CSE473 ;;;; Winter 1999, UWashington; ;;;; http://www.cs.washington.edu/education/courses/cse473/assign/ps3.html ;; Feature access functions (defun feature-name (feature) (first feature)) (defun feature-description (feature) (second feature)) (defun feature-domain (feature) (third feature)) (defun feature-extractor (feature) (fourth feature)) ;; Checks to see if something has the proper form to be a feature (defun feature-p (feature) (and (listp feature) ;; Features need not necessarily have an extractor (or (= (length feature) 3) (= (length feature) 4)) ;; The name of a feature must start with a ':'. (keywordp (feature-name feature)) ;; The descriptor string must be a string. (stringp (feature-description feature)) ;; The domain must be a list of keywords (symbols ;; starting with a ':'). (listp (feature-domain feature)) (every #'keywordp (feature-domain feature)) ;; Either the extractor is not there, or it's null, ;; or it's a function (if (= (length feature) 4) (or (null (feature-extractor feature)) (functionp (feature-extractor feature))) T))) ;; Returns x * log2(x); requires 0 <= x <= 1; defines 0 * log2(0) = 0 (defun xlogx (x) (assert (numberp x) (x) "Non-number ~S passed to xlogx" x) (assert (and (>= x 0) (<= x 1)) (x) "Number ~S passed to function xlogx not between 0 and 1" x) (if (= x 0) 0 (* (log x 2) x))) ;; Returns the information content (entropy) in the set of ;; probabilities passed to it (defun info (&rest probs) (assert (every #'(lambda (x) (and (>= x 0) (<= x 1))) probs) (probs) "Some probability passed to info is not between 0 and 1.") (assert (= 1 (apply #'+ probs)) (probs) "Probs passed to info do not sum to 1") (apply #'+ (mapcar #'(lambda (x) (- (xlogx x))) probs))) ;; Returns the information content (entropy) in the set of data points ;; over the named key feature. (defun data-info (key-feature data) (let* ((total (length data)) (pos (count-if #'(lambda (l) (cdr (assoc key-feature l))) data)) (neg (- total pos))) (if (= total 0) 0 (info (/ pos total) (/ neg total))))) ;; Example of using data-info; if I load the data in wait-ex.lsp and ;; run the following command, I get back the entropy, 1.0 (since it's ;; half and half): ;; ;; (data-info :will-wait *data*) ;; Here's some functions to help you use the data that you have. ;; They automatically label data with the features passed in. ;; label-points-with-features takes a data point list and a feature ;; list and inserts feature values into each data point. (defun label-points-with-features (data-points features &optional (new-data-points nil)) (if (null data-points) new-data-points (label-points-with-features (rest data-points) features (cons (label-features (first data-points) features (first data-points)) new-data-points)))) ;; labels just one data point with all features in the feature list (defun label-features (data-point features new-data-point) (if (null features) new-data-point (label-features data-point (rest features) (acons (feature-name (first features)) (funcall (feature-extractor (first features)) data-point) new-data-point)))) ;; The rest is up to you; good luck!