Coverage report: /home/ellis/comp/core/lib/dat/csv.lisp
Kind | Covered | All | % |
expression | 0 | 440 | 0.0 |
branch | 0 | 56 | 0.0 |
Key
Not instrumented
Conditionalized out
Executed
Not executed
Both branches taken
One branch taken
Neither branch taken
1
;;; lib/dat/csv.lisp --- CSV Data Format
3
;; Comma Separated Values (or tabs or whatever)
7
;; This package prioritizes flexibility. If you want speed, convert to
10
;; Still, efficiency is worth pursuing here and there are some obvious gaps to
13
;; - remove sequence functions
14
;; - research optimized access patterns used in other langs/state of art
18
;; ref: https://datatracker.ietf.org/doc/html/rfc4180
23
(defun parse-number-no-error (string &optional default)
26
(parse-number string))))
31
(defparameter *csv-separator* #\,)
32
(defparameter *csv-quote* #\")
33
(defparameter *csv-print-quote-p* t "print \" when the element is a string?")
34
(defparameter *csv-default-external-format* :utf-8)
36
(defun write-csv-line (record &key stream (delimiter *csv-separator*))
37
"Accept a record and print it in one line as a csv record.
39
A record is a sequence of element. A element can be of any type.
40
If record is nil, nothing will be printed.
41
If stream is nil (default case), it will return a string, otherwise it will return nil.
42
For efficiency reason, no intermediate string will be constructed. "
44
(with-output-to-string (s)
45
(let ((*standard-output* s)
46
(record-size (length record)))
47
(loop for e across record
51
(if *csv-print-quote-p*
53
(write-char *csv-quote*)
55
(write-char *csv-quote*))
58
(when (< i (1- record-size))
59
(write-char delimiter)))))))
60
(format stream "~&~a" result)))
62
(defun write-csv-stream (stream table &key (delimiter *csv-separator*))
63
"Accept a stream and a table and output the table as csv form to the stream.
65
A table is a sequence of lines. A line is a sequence of elements.
66
Elements can be any types"
67
(loop for l across table
68
do (write-csv-line l :stream stream :delimiter delimiter))
69
(write-char #\newline stream)
72
(defun write-csv-file (filename table &key (external-format *csv-default-external-format*) (delimiter *csv-separator*))
73
"Accept a filename and a table and output the table as csv form to the file.
75
A table is a sequence of lines. A line is a sequence of elements.
76
Elements can be any types"
77
(with-open-file (f filename :direction :output
78
:if-does-not-exist :create
80
:external-format external-format)
81
(write-csv-stream f table :delimiter delimiter)))
83
(defun write-csv-string (table)
84
(with-output-to-string (str)
85
(write-csv-stream str table)))
87
(defun parse-csv-string (str &key (delimiter *csv-separator*)) ;; refer RFC4180
89
;; (regexp:split-re "," str)
90
(let ((q-count (count *csv-quote* str :test #'char-equal)))
91
(when (oddp q-count) (warn 'simple-warning :format-control "odd number of #\" in a line (~A)"
92
:format-arguments (list q-count)))
94
(cl-ppcre:split delimiter str) ;(cl-ppcre:split *csv-separator* str)
95
(macrolet ((push-f (fld flds) `(push (coerce (reverse ,fld) 'string) ,flds)))
96
(loop with state = :at-first ;; :at-first | :data-nq | :data-q | :q-in-nq | q-in-q
97
with field with fields
98
for chr of-type character across str
99
do (cond ((eq state :at-first)
101
(cond ((char-equal chr *csv-quote*) (setf state :data-q))
102
((char-equal chr delimiter) (push "" fields))
103
(t (setf state :data-nq) (push chr field))))
105
(cond ((char-equal chr *csv-quote*) (setf state :q-in-nq))
106
((char-equal chr delimiter)
107
(push-f field fields)
108
(setf state :at-first))
109
(t (push chr field))))
111
(cond ((char-equal chr *csv-quote*) (setf state :q-in-q))
112
((char-equal chr delimiter)
113
(push-f field fields)
114
(setf state :at-first))
115
(t (setf state :data-nq) (push chr field))))
117
(cond ((char-equal chr *csv-quote*) (setf state :q-in-q))
118
((char-equal chr delimiter) (push-f field fields) (setf state :at-first))
119
(t (push chr field))))
121
(cond ((char-equal chr *csv-quote*) (setf state :data-q))
122
;; this should only be done conditionally - early escapes quotes
123
((char-equal chr delimiter)
124
(push-f field fields)
125
(setf state :at-first))
127
;; (error "illegal value ( ~A ) after quotation" chr)
131
(progn (push-f field fields) (reverse fields)))))))
135
(defun read-csv-line (stream &key type-conv-fns map-fns (delimiter *csv-separator*) (start 0) end)
136
"Read one line from stream and return a csv record.
138
A CSV record is a vector of elements.
140
type-conv-fns should be a list of functions.
141
If type-conv-fns is nil (the default case), then all will be treated
144
map-fns is a list of functions of one argument and output one result.
145
each function in it will be applied to the parsed element.
146
If map-fns is nil, then nothing will be applied.
148
start and end specifies how many elements per record will be included.
149
If start or end is negative, it counts from the end. -1 is the last element.
151
(declare (type (or (simple-array function *) null) type-conv-fns map-fns))
152
(let* ((rline (read-line stream nil nil)))
154
(let* ((line (string-trim '(#\Space #\Newline #\Return) rline))
155
(strs (parse-csv-string line :delimiter delimiter))
156
(strs-size (length strs)))
158
(setf start (+ start strs-size)))
159
(when (and end (< end 0))
160
(setf end (+ end strs-size)))
161
(setf strs (subseq strs start end))
163
(unless (= (length strs) (length type-conv-fns))
164
(error "Number of type specifier (~a) does not match the number of elements (~a)."
165
(length type-conv-fns) (length strs))))
167
(unless (= (length strs) (length map-fns))
168
(error "Number of mapping functions (~a) does not match the number of elements (~a)."
169
(length map-fns) (length strs))))
171
;; strs is not needed so we simply overwrite it
174
(map 'vector #'funcall type-conv-fns result)))
177
(map 'vector #'funcall map-fns result)))
180
(defun read-csv-stream (stream &key (header t) type-spec map-fns (delimiter *csv-separator*) (start 0) end)
181
"Read from stream until eof and return a csv table.
183
A csv table is a vector of csv records.
184
A csv record is a vector of elements.
186
Type spec should be a list of type specifier (symbols).
187
If the type specifier is nil or t, it will be treated as string.
188
If type-spec is nil (the default case), then all will be treated
191
map-fns is a list of functions of one argument and output one result.
192
each function in it will be applied to the parsed element.
193
If any function in the list is nil or t, it equals to #'identity.
194
If map-fns is nil, then nothing will be applied.
196
start and end specifies how many elements per record will be included.
197
If start or end is negative, it counts from the end. -1 is the last element.
201
(macrolet ((make-num-specifier (specifier)
202
`(lambda (s) (let ((s (parse-number-no-error s s)))
203
(if (numberp s) (funcall ,specifier s) s)))))
207
((t nil string) #'identity)
208
(number #'(lambda (s) (parse-number-no-error s s)))
209
(float (make-num-specifier #'float))
210
(single-float (make-num-specifier #'(lambda (s) (coerce s 'single-float))))
211
(double-float (make-num-specifier #'(lambda (s) (coerce s 'double-float))))
212
(integer (make-num-specifier #'round))
213
(pathname #'pathname)
215
(keyword (lambda (s) (intern s :keyword)))))
228
(symbol-function fn))
229
(t (error "~a is not a valid function specifier." fn))))
233
(cons (coerce header 'vector))
234
(boolean (when header
235
(read-csv-line stream :delimiter delimiter))))))
236
(loop for rec = (read-csv-line stream :type-conv-fns type-conv-fns :map-fns map-fns :delimiter delimiter
237
:start start :end end)
239
collect rec into result
242
(coerce result 'vector)
245
(defun read-csv-file (filename &key (header t)
248
(delimiter *csv-separator*)
249
(external-format *csv-default-external-format*)
252
"Read from stream until eof and return a csv table.
254
A csv table is a vector of csv records.
255
A csv record is a vector of elements.
257
Type spec should be a list of type specifier (symbols).
258
If the type specifier is nil or t, it will be treated as string.
259
If type-spec is nil (the default case), then all will be treated
262
map-fns is a list of functions of one argument and output one result.
263
each function in it will be applied to the parsed element.
264
If any function in the list is nil or t, it equals to #'identity.
265
If map-fns is nil, then nothing will be applied.
267
external-format (default is :UTF-8)
269
start and end specifies how many elements per record will be included.
270
If start or end is negative, it counts from the end. -1 is the last element.
272
(with-open-file (f filename :external-format external-format)
273
(read-csv-stream f :type-spec type-spec :map-fns map-fns
275
:start start :end end
279
(defun read-csv-file-and-sort (filename sort-order &key (header t) (order :ascend) type-spec map-fns (delimiter *csv-separator*) (external-format *csv-default-external-format*))
280
(let ((table (read-csv-file filename
285
:external-format external-format)))
286
(loop for i in (reverse sort-order)
288
(stable-sort table (ecase order (:ascend #'string<=) (:descend #'string>=))
289
:key (lambda (rec) (aref rec i))))
290
finally (return table))))
292
(defclass csv-file-data (schema:file-data-source) ())
295
(defmethod scan-data ((self csv-file-data) (projection sequence))
296
(if (null projection)
297
(read-csv-file (path self))
301
(defmethod serialize ((obj vector) (format (eql :csv)) &key stream (delimiter *csv-separator*))
302
(write-csv-stream stream obj :delimiter delimiter))
304
(defmethod serialize ((obj vector) (format (eql :csv))
306
(external-format *csv-default-external-format*)
307
(delimiter *csv-separator*))
308
(write-csv-file path obj :delimiter delimiter :external-format external-format))
310
(defmethod deserialize ((from pathname) (format (eql :csv)) &rest args)
311
(declare (ignore format))
312
(apply 'read-csv-file from args))
314
(defmethod deserialize (from (format (eql :csv)) &rest args)
315
(declare (ignore format))
316
(apply 'read-csv-stream from args))
318
(defmethod deserialize ((from string) (format (eql :csv)) &rest args)
319
(declare (ignore format))
320
(apply 'parse-csv-string from args))