Coverage report: /home/ellis/comp/core/lib/dat/arff.lisp
Kind | Covered | All | % |
expression | 0 | 336 | 0.0 |
branch | 0 | 32 | 0.0 |
Key
Not instrumented
Conditionalized out
Executed
Not executed
Both branches taken
One branch taken
Neither branch taken
1
;;; lib/dat/arff.lisp --- ARFF file format
3
;; Attribute-Relation File Format
5
;; based on https://github.com/pieterw/cl-arff-parser
7
;; ref: https://waikato.github.io/weka-wiki/formats_and_processing/arff_stable/
11
% 1. Title: Iris Plants Database
14
% (a) Creator: R.A. Fisher
15
% (b) Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
16
% (c) Date: July, 1988
20
@ATTRIBUTE sepallength NUMERIC
21
@ATTRIBUTE sepalwidth NUMERIC
22
@ATTRIBUTE petallength NUMERIC
23
@ATTRIBUTE petalwidth NUMERIC
24
@ATTRIBUTE class {Iris-setosa,Iris-versicolor,Iris-virginica}
30
5.1,3.5,1.4,0.2,Iris-setosa
31
4.9,3.0,1.4,0.2,Iris-setosa
32
4.7,3.2,1.3,0.2,Iris-setosa
33
4.6,3.1,1.5,0.2,Iris-setosa
34
5.0,3.6,1.4,0.2,Iris-setosa
35
5.4,3.9,1.7,0.4,Iris-setosa
36
4.6,3.4,1.4,0.3,Iris-setosa
37
5.0,3.4,1.5,0.2,Iris-setosa
38
4.4,2.9,1.4,0.2,Iris-setosa
39
4.9,3.1,1.5,0.1,Iris-setosa
44
https://storm.cis.fordham.edu/~gweiss/data-mining/datasets.html
47
(in-package :dat/arff)
50
((arff-path :accessor arff-path
53
:documentation "A string to the path of the arff
54
file. e.g. /home/user/myData/foo.arff")
55
(arff-relation :accessor arff-relation
56
:initarg :arff-reltation
58
:documentation "The string after @relation. This is
59
essentially the name of the arff.")
60
(arff-attributes :accessor arff-attributes
61
:initarg :arff-attributes
64
:documentation "The attributes as specified in the
65
header. Each attribute is a list that looks as
66
follows: (\"attribute-name\" (\"type\")). In case of a
67
nominal attribute it looks like
68
this: (\"attribute-name\" (\"nominal\" . values)). ")
69
(arff-data :accessor arff-data
73
:documentation "All the data. The bulk of the file."))
74
(:documentation "An arff object contains all the data found in a
77
(defmethod print-object ((arff arff) stream)
79
(pprint-logical-block (stream nil)
80
(format stream "<arff ~a:~%attributes: ~{~%~a~^,~}~%data: ~{~%~a~}>"
81
(arff-relation arff) (arff-attributes arff) (arff-data arff)))
82
(format stream "<arff ~a>" (arff-relation arff))))
84
(defun trim-comments-and-spaces (string &optional (comment-marker "%"))
85
(string-trim (list (code-char 9)) ;; tabs
86
(string-trim " " (subseq string 0 (search comment-marker string)))))
88
(defun csv->list (string &optional (separator ","))
89
"Given a string like '1,2,3, 6, foo' will return list ('1' '2' '3'
95
for end = (search separator string :start2 start)
97
do (setf continue nil)
98
(setf end (length string))
99
collect (string-trim " " (subseq string start end))
100
do (setf start (+ end 1))))
102
(defun string-replace (str1 sub1 sub2)
103
"Nondestructively replaces all occurences of sub1 in str1 by sub2"
104
(let ((str1 (string str1))
110
if (string-equal str1 sub1
112
:end1 (min (length str1)
113
(+ index1 (length sub1))))
114
do (setq str2 (concatenate 'string str2 sub2))
115
(incf index1 (length sub1))
117
(setq str2 (concatenate 'string str2
118
(subseq str1 index1 (1+ index1))))
120
unless (< index1 (length str1))
123
(defun search-space-or-tab (line)
124
(or (search " " line)
125
(search (list (code-char 9)) line)))
127
(defun parse-attribute-name (line)
128
"Assumes the beginning of this line is the attribute-name. If spaces
129
are to be included in the name then the entire name must be quoted. As
130
second return value it also returns the rest of the line which should
132
(setf line (string-replace line (string (code-char 9)) " "))
133
(if (and (search "'" line) ;; attribute name contains '
134
(or (not (search "{" line))
135
(< (search "'" line) (search "{" line))))
136
(values (string-replace (subseq line 1 (search "'" line :start2 1)) " " "-")
137
(trim-comments-and-spaces (subseq line (1+ (search "'" line :start2 1)))))
138
(values (subseq line 0 (search-space-or-tab line))
139
(trim-comments-and-spaces (subseq line (search-space-or-tab line))))))
141
(defun parse-datatype (line)
142
"Assumes that the line starts with the datatype.Look at
143
http://www.cs.waikato.ac.nz/~ml/weka/arff.html for information about
144
the datatype. There is no support for the date datatype."
145
(cond ((equal 0 (search "real" line :test #'string-equal))
147
((equal 0 (search "integer" line :test #'string-equal))
149
((equal 0 (search "numeric" line :test #'string-equal))
151
((equal 0 (search "string" line :test #'string-equal))
153
((search "{" line) ;; nominal
156
(string-trim " " (subseq line (1+ (search "{" line)) (search "}" line))))))
158
(error "datatype ~a not real, integer or nominal" line))))
161
(defun parse-@attribute (line)
162
"@attribute <attribute-name> <datatype>. Returns a list containing
163
the attribute-name and then a list containing datatype information as
164
parsed by parse-datatype."
165
(let (attribute data-type)
166
(multiple-value-setq (attribute data-type)
167
(parse-attribute-name (subseq line (1+ (search " " line)))))
168
(list attribute (parse-datatype data-type))))
170
(defun parse-data (line)
174
;; -----------------------------------------
175
;; Main function used to parse an arff file.
176
;; -----------------------------------------
177
(defun parse-arff-stream (stream &optional path)
178
(let ((arff (make-instance 'arff :arff-path path)))
180
with data-mode-p = nil ;; true when parsing data
181
for line = (read-line stream nil)
183
for trimmed-line = (trim-comments-and-spaces line)
184
when (and data-mode-p
185
(not (equalp trimmed-line "")))
186
collect (parse-data line) into data
187
when (not data-mode-p)
188
do (cond ((equalp trimmed-line "")) ;; skip empty and commented lines
189
((search "@relation" (string-downcase trimmed-line))
190
(setf (arff-relation arff)
191
(subseq trimmed-line (1+ (search " " trimmed-line)))))
192
((search "@attribute" (string-downcase trimmed-line))
193
(setf (arff-attributes arff)
194
(append (arff-attributes arff)
195
(list (parse-@attribute trimmed-line)))))
196
((search "@data" (string-downcase trimmed-line))
197
(setf data-mode-p t)))
198
finally (setf (arff-data arff) data))
201
(defun parse-arff-string (string &optional path)
202
(with-input-from-string (s string)
203
(parse-arff-stream s path)))
205
(defun parse-arff (arff-path)
206
"The arff-path should be a string pointing to an arff-file."
207
(parse-arff-stream (open arff-path) arff-path))
210
(defgeneric remove-attribute-by-name (arff name)
211
(:documentation "Removes the feature with the given name from the
212
arff object (not from the actual file). It will remove it both from
213
that @attributes and the @data."))
215
(defmethod remove-attribute-by-name ((arff arff) (name string))
216
(let ((position (position name (arff-attributes arff) :key #'first :test #'string-equal)))
218
(setf (arff-attributes arff)
219
(delete name (arff-attributes arff) :key #'first :test #'string-equal))
220
(setf (arff-data arff)
221
(loop for instance in (arff-data arff)
222
collect (delete-if (nth position instance) instance))))))
224
(defmethod remove-attribute-by-name ((arff arff) name)
225
(remove-attribute-by-name arff (format nil "~a" name)))