common lisp

resources :noexport

for loop

  (loop for x in '(1 2 3)
collect x) ;; returns list
(loop for x in '(1 2 3)
do (print x)) ;; prints the numbers
(loop for x in '(a b c)
for y in '(1 2 3)
collect (list x y))
;; ((A 1) (B 2) (C 3))
(loop for x in '(a b c d e)
for y from 1
if (> y 1)
do (format t ", ~A" x)
else
do (format t "~A" x))
;; A, B, C, D, E
;; NIL

functions

  (defun <name> (list of arguments)
"docstring"
(body))

plist

defining a plist

  (setf my-plist '(alt "yes" bar "no"))
retrieve a value by key
  (getf my-plist 'alt)
change the value that corresponds to a key
  (setf (getf my-plist 'alt) 45)
get a list of the attributes
  (loop for (key value) on my-plist by #'cddr
collect key)

unbind variable

  (makunbound 'foo)

undefine funciton

  (fmakunbound 'function)

quicklisp

need to install it manually afaik

  curl -O https://beta.quicklisp.org/quicklisp.lisp
sbcl --load quicklisp.lisp
then in sbcl
  (quicklisp-quickstart:install :path "~/.quicklisp")
(ql:add-to-init-file)
then to install a package in sbcl:
  (ql:quickload "cl-csv")
this will install the package if it doesnt exist, and if it exists it just loads it

classes

example class:

  (defclass my-class ()
((property1 :accessor property1-accessor)
(property2 :accessor property2-accessor)))

the accessor is the name you use to access a property1
we construct an object using make-instance:
  (defvar p1 (make-instance 'my-class))

we can initialize the class with specific values, first we need to use the :initarg keyword on the class properties
  (defclass my-class ()
((property1 :initarg property1-initarg :initform default-value :accessor property1-accessor)
(property2 :accessor property2-accessor)))

then we can pass the values we want to make-instance
:initform is used to pass a default value to the slot incase :initarg argument isnt passed to make-instance
  (defvar p1 (make-instance 'my-class :property1-initarg property1-value))

this is the syntax for accessing variables of a class
  (property1-accessor p1)
we can do inheritance by passing the parent class in the parenthases after the class name:
  (defclass my-class (my-parent-class))
the following syntax is used to define a function for a class, note that the function print-object is the function that gets called when trying to print an object using (print object) so thats what we're defining here
  (defmethod print-object ((obj my-class) stream)
(print-unreadable-object (obj stream :type t)
(format stream "value of property1: ~a" (property1 obj))))

structs

structs are basically simpler classes with default functionsinitializersetc

  (defstruct rgb ()
(r 0) (g 0) (b 0)) ;; initialize slot values to 0
;; e.g. constructor built by default
(setf color (make-rgb :r 2)) ;; => #S(RGB :NIL NIL :R 2 :G 0 :B 0)
(list (rgb-r color) (rgb-b color)) ;; => (2 0)

arrays/matrices

to create an array:

  (make-array 10 :initial-element 10)

10101010101010101010

multidimensional arrays:
  (make-array '(4 3) :initial-element 15)
#2A((15 15 15) (15 15 15) (15 15 15) (15 15 15))

access slot in array
  (aref (make-array '(4 3 1) :initial-element 15) 2 0 0)
15

get the length of the array (last argument is the dimension to check length of):
  (print (array-dimension (make-array '(4 3) :initial-element 15) 0))
(print (array-dimension (make-array '(4 3) :initial-element 15) 1))
3
4

iterate through array:
  (map nil #'print (make-array 3 :initial-element 15))
15
15
15

iterate with index:
  (let ((arr (make-array '(4 3) :initial-element 15)))
(loop for i from 0 below (array-dimension arr 0)
do (print (aref arr i 1))))
15
15
15
15

use displacement arrays to get the first element of every row in a 2d array:
  (loop for i from 0 below (array-dimension arr 0)
collect (let ((row (make-array
(array-dimension arr 1)
:displaced-to arr
:displaced-index-offset (* i (array-dimension arr 1)))))
(aref row 0)))

we can create resizable arrays with the :adjustable keyword, :fill-pointer denotes the next position to be filled in the array and is automatically maintained by the array itself, but has to be initialized
  (defparameter *x* (make-array 0 :adjustable t :fill-pointer 0))
(vector-push-extend 'a *x*)
(vector-push-extend 'b *x*)
*x*

AB

fill array manually:
  (make-array '(3 3) :initial-contents '((1 0 0) (0 1 0) (0 0 1)))

vectors

create vector

  (vector 3 5 1)

we can use length on vectors
  (length (vector 3 5 1)) ==> 3

get nth element:
  (defparameter *x* (vector 1 2 3))
(length *x*) ==> 3
(elt *x* 1) ==> 2

we can have nested vectors
  (let ((a (vector (vector 1 2) (vector 3) 4)))
(print a)
(print (elt a 0)))
#(1 2)
#(#(1 2) #(3) 4)

multithreading

in sbcl sb-thread:make-thread takes a function to call in a newly created thread.

  (sb-thread:make-thread
(lambda ()
(progn
(sleep 0) ;; give other threads a chance to run, then return here
(setf c (+ a b))
(print "ADDITION:")
(print c))))
although notice that this is special to sbcl.
the broken link: https://github.com/lmj/lparallel library works great on multiple CL implementations, more on lparallel at https://lparallel.org/page/2/

macros

simple macro from https://lispcookbook.github.io/cl-cookbook/macros.html:

  (defmacro setq2 (v1 v2 e)
(list 'progn (list 'setq v1 e) (list 'setq v2 e)))
;; (setq2 v1 v2 3) => v1=3,v2=3
this macro is very close to the following function definition:
  (defun setq2-function (v1 v2 e)
(list 'progn (list 'setq v1 e) (list 'setq v2 e)))
consider the following macro when2 which behaves like the builtin when
  (defmacro when2 (condition &rest body)
`(if ,condition (progn ,@body)))
;; (when2 t (print "hey")) => "hey"
;; (when2 nil (print "hey")) => nothing
from http:www.lispworks.comdocumentationHyperSpecBody02_df.htm
if a comma is immediately followed by an at-sign, then the form following the at-sign is evaluated to produce a list of objects. these objects are then “spliced” into place in the template. for example, if x has the value (a b c), then
`(x ,x ,@x foo ,(cadr x) bar ,(cdr x) baz ,@(cdr x))
=> (x (a b c) a b c foo b bar (b c) baz b c)
body is just a list and can be used/modified
  (defmacro test-macro (&rest body)
`(quote ,body))
this is called an anaphoric macro where you wrap the body with a lexical function definition that shadows the global one
  CL-USER> (defun foo () 1)
FOO
CL-USER> (defmacro with-different-foo (&body body)
`(flet ((foo () 2))
,@body))
WITH-DIFFERENT-FOO
CL-USER> (progn
(foo))
1
CL-USER> (with-different-foo
(foo))
2

reader macros

the #+ and #- reader macros are pretty nice for commenting out sexps. they allow ignoring the following sexp, if the given symbol isn't/is found in *FEATURES*. Just pick a symbol not in *FEATURES*, and use it with #+ like this:

  ,#+nil
(defun foo ()
...)
Now, the function definition will be ignored (unless NIL is in *FEATURES*, which is not very likely).

use double-floats by default

  (setf *read-default-float-format* 'double-float)

enforce garbage collection

running

  (gc :full t)
invokes the garbage collector

checking equalities

some equality tests may return results that are implementation-specific, i dont list those here, i only wrote the ones that are supposed to work on any cl implementation
string equality:

  (equal "foo" (copy-seq "foo")) ;; => T
symbol equality (any equality operator works for symbols):
  (eq 'foo 'foo) ;; => T
number equality:
  ;; 'eql' is "type sensitive"
(eql 3 3.0) ;; => NIL
(eql 3 3) ;; => T
;; 'equal' works for floats and integers all the same
(equal 3 3.0) ;; => T
(equal 3 3) ;; => T
;; 'equalp' works for floats and integers all the same
(equalp 3 3.0) ;; => T
(equalp 3 3) ;; => T
object equality (same object in memory):
  (eq 'a 'a) ;; => T
(eq "foo" (copy-seq "foo")) ;; => NIL
(eq 3 3.0) ;; => NIL
sequence equality (and general object equality which compares the internal structure):
uppercase and lowercase letters in strings are considered by equal to be distinct. in contrast, equalp ignores case distinctions in strings. equalp works for arrays but equal doesnt ("works" as in does element-wise comparison, which is the intended equality check here)
  ;; strings
(equal "Foo" (copy-seq "Foo")) ;; => T
(equal "Foo" (copy-seq "foo")) ;; => NIL
(equalp "Foo" (copy-seq "foo")) ;; => NIL
;; vectors/arrays
(equal #(1 2 3) (copy-seq #(1 2 3))) ;; => NIL
(equalp #(1 2 3) (copy-seq #(1 2 3))) ;; => T
;; lists
(equal '(1 2 3) (copy-seq '(1 2 3))) ;; => T
(equalp '(1 2 3) (copy-seq '(1 2 3))) ;; => T

alist

we can append new pairs to an alist using push (there are many other ways):

  (let ((my-alist '((a . b))))
(push (cons 'x 'y) my-alist)
(push '(10 . 20) my-alist))
((10 . 20) (X . Y) (A . B))

multiple value returns

  (defun polar (x y)
(values (sqrt (+ (* x x) (* y y))) (atan y x)))
(multiple-value-bind (r theta) (polar 3.0 4.0)
(vector r theta))

files

https://lispcookbook.github.io/cl-cookbook/files.html

basename

  (pathname-name "test1/test.org") ;; test

file extension

  (pathname-type "test1/test.org") ;; org

parent directory

  (pathname-directory "test1/test.org") ;; (:RELATIVE "test1")
(car (last (pathname-directory "test1/test.org"))) ;; "test1"
(uiop:pathname-parent-directory-pathname "./test1/test.org") ;; #P"./"

check if file exists

  (probe-file "/etc/passwd") ;; #P"/etc/passwd"
(probe-file "foo") ;; NIL
(uiop:probe-file* (uiop:parse-unix-namestring "/home/mahmooz/data/got/[Algorithms and Combinatorics №27] Stasys Jukna (auth.) - Boolean Function Complexity_ Advances and Frontiers (2012, Springer) [10.1007_978-3-642-24508-4] - libgen.li-0.png")) ;; handles [ ] properly while probe-file errors out

expanding tilde

  (uiop:native-namestring "~/.emacs.d/") ;; "/home/mahmooz/.emacs.d/"

read file

  (uiop:read-file-string "file.txt")
(uiop:read-file-lines "file.txt")
(uiop:read-file-string (uiop:parse-unix-namestring "path")) ;; handles [ ] (and probably other pathological patterns in filenames) properly

write to file

  (alexandria:write-string-into-file content "file.txt")
(str:to-file "file.txt" content)
(with-open-file (f "/home/pedro/miscellaneous/misc/tests-output/stack-overflow.lisp"
:direction :output
:if-exists :overwrite
:if-does-not-exist :create)
(write-sequence "hello" f)
(pprint (list example-1 example-2) str))

join paths

  (merge-pathnames "otherpath" "/home/mahmooz/") ;; #P"/home/mahmooz/otherpath"
(namestring (merge-pathnames "otherpath" "/home/mahmooz/")) ;; "/home/mahmooz/otherpath"

path to string

  (namestring #P"test/test.org") ;; "test/test.org"

get current directory

  (uiop/os:getcwd) ;; #P"/home/mahmooz/work/emacs.d/"

change working directory

  (uiop:chdir "/bin/")
(let ((dir "/path/to/another/directory/")) ;; only set cwd lexically
(uiop:with-current-directory (dir)
(directory-files "./")))

list directory

  (uiop:directory-files "./")
(uiop:subdirectories "./")
(directory #P"*.jpg")

walk directory

walking a directory recursively is possible with cl-fad:

  (let (files)
(cl-fad:walk-directory "/tmp/" (lambda (x) (push (namestring x) files)))
(car files))
/tmp/tmp.wYCifjalP0

MOP

MOP refers to "The Common Lisp Object System MetaObject Protocol".
in the following example, we use an sbcl-specific mop package named sb-mop, other common lisp implementations may have similar functionality in a differently named package.

  ;; base class with "static" property `rule' that is shared by all instances
(defclass other-text-object (text-object)
((rule
:accessor other-text-object-rule
:allocation :class
:documentation "the matching method from `*matching-methods*' used to match against the text object.")))
;; define the `inline-math' subclass with its own default value for `rule'
(defclass inline-math (other-text-object)
((rule
:allocation :class
:initform (list '(:name inline-math
:method pair
:data ("\\(" :string "\\)" :string)
:nestable nil)))))
;; we need to "finalize" the class to be able to use MOP
(sb-mop:finalize-inheritance (find-class 'inline-math))
;; then we can grab the rule from the class itself:
(slot-value (sb-mop:class-prototype (find-class 'inline-math)) 'rule)
to make it implementation-agnostic (see https://sourceforge.net/p/sbcl/mailman/sbcl-help/thread/sqn024gyx9.fsf@cam.ac.uk/) we can do:
  ,#+sbcl (:import-from "SB-MOP" "SLOT-VALUE-USING-CLASS")
,#+cmu (:import-from "MOP" "SLOT-VALUE-USING-CLASS")

common sequence operations

mapping sequence

  (mapcar '1+ (loop for i from 0 to 10 collect i))

1234567891011

filtering sequence

  (format t "~A" (remove-if-not 'identity (list nil 1 2 3 nil)))
(terpri)
(format t "~A" (remove-if 'identity (list nil 1 2 3 nil)))
(NIL NIL)
(1 2 3)

finding in a sequence

  (find-if 'symbolp (list 1 2 3 'test))
TEST

reducing a sequence

  (reduce '+ (list 5 6 10))
21

concatenating sequences

  (format t "~A" (concatenate 'string "test1" "test2"))
(terpri)
(format t "~A" (concatenate 'list (list 1 2 3) (list 4 5)))
(1 2 3 4 5)
test1test2

zipping two sequences

  (pairlis '(a b c) '(1 2 3))
((C . 3) (B . 2) (A . 1))

regex

using cl-ppcre. docs at https://edicl.github.io/cl-ppcre/.

regex simple matching

this returns the indicies bounding the matching string.

  (format t "1. ~A~%" (cl-ppcre:scan "[a-z]{8}" "this worddddd is matched"))
(format t "2. ~A~%" (cl-ppcre:all-matches "[a-z]{8}" "this worddddd is matched"))
(format t "3. ~A~%" (cl-ppcre:all-matches "[a-z]{5}" "more than one match"))
(format t "4. ~A~%" (cl-ppcre:all-matches "[a-z]{5}" "more than one match match match"))
4. (14 19 20 25 26 31)
3. (14 19)
2. (5 13)
1. 5

regex group matching

  (cl-ppcre:register-groups-bind (kw val)
("\\+([a-z]+): (.*)" "#+mykeyword: myvalue")
(cons kw val))
(mykeyword . myvalue)

embedded modifiers

different embedded modifiers are supported, like (?i) to enable case-insensitive matching.

  (cl-ppcre:all-matches "(?i)word" "this is an uppercase WORD that is matched")
(21 25)

communicating with subprocesses

https://stackoverflow.com/questions/15988870/how-to-interact-with-a-process-input-output-in-sbcl-common-lisp

  (defun program-stream (program &optional args)
(let ((process (sb-ext:run-program program args
:input :stream
:output :stream
:wait nil
:search t)))
(when process
(make-two-way-stream (sb-ext:process-output process)
(sb-ext:process-input process)))))
(defvar *stream* (program-stream "cat"))
(format *stream* "foo bar baz~%")
(finish-output *stream*)
(read-line *stream*)
(close *stream*)
;; CL-USER> (defparameter *stream* (program-stream "python" '("-i")))
;; *STREAM*
;; CL-USER> (loop while (read-char-no-hang *stream*)) ; skip startup message
;; NIL
;; CL-USER> (format *stream* "1+2~%")
;; NIL
;; CL-USER> (finish-output *stream*)
;; NIL
;; CL-USER> (read-line *stream*)
;; "3"
;; NIL
;; CL-USER> (close *stream*)
;; T
;; CL-USER> (defparameter *stream* (program-stream "unbuffer"
;; '("-p" "tr" "a-z" "A-Z")))
;; *STREAM*
;; CL-USER> (format *stream* "foo bar baz~%")
;; NIL
;; CL-USER> (finish-output *stream*)
;; NIL
;; CL-USER> (read-line *stream*)
;; "FOO BAR BAZ
;; "
;; NIL
;; CL-USER> (close *stream*)
T

get definition/source of function

  (defun test ()
(+ (- 1 2) 3))
(function-lambda-expression #'test)
(LAMBDA () (BLOCK TEST (+ (- 1 2) 3))), T, TEST

check if symbol refers to a function (if function is bound)

  CLTPT> (fboundp 'identity)
#<FUNCTION IDENTITY>
CLTPT> (fboundp 'identity2)
NIL

when-let

  (defmacro when-let (bindings &body body)
"bind multiple VARS to their FORMS like `let`. if all are non-nil,
execute BODY with those bindings in effect, otherwise return NIL."
(let ((bnds (if (and (consp bindings)
(consp (first bindings))
(symbolp (caar bindings)))
bindings
(list bindings))))
`(let ,bnds
(when (and ,@(mapcar #'car bnds))
,@body))))
(defmacro when-let* (bindings &body body)
"like LET*, but if every FORM in BINDINGS evaluates non-nil,
execute BODY, otherwise return NIL immediately."
(if (null bindings)
`(progn ,@body)
(destructuring-bind ((var form) . rest) bindings
`(let ((,var ,form))
(when ,var
(when-let* ,rest ,@body))))))

garbage

  ;; sketch note /home/mahmooz/brain/pen/2025-02-28-Note-12-53.xopp
;; we need to keep all macros including those that "end" other objects, perhaps
;; not macros that return the value 'end though, as those should be just discarded from the
;; parse tree itself, they should be however stored in the text object himself so that we know
;; where the ending starts and ends
(defun parse2 (str1 &optional (as-tree t))
"parse a string, returns an object tree.
the order of the results grabbed from `find-nested-pairs' matters for this function as
it reverses it."
(let ((macros) ;; includes all macros including opening and closing ones
(final-macro-pairs) ;; once done we would have paired each macro with its ending (if its there)
(my-stack))
(let ((macro-regions (reverse (find-nested-pairs str1 (format nil "~A(" *text-macro-seq*) ")"))))
(loop for macro-region in macro-regions ;; these regions are represented as just conses
do (let* ((macro-begin-idx (car macro-region))
(macro-end (cdr macro-region))
(macro-text (subseq str1
(+ (length *text-macro-seq*) macro-begin-idx)
macro-end))
(result)
(final-text-object))
(handler-case (eval (read-from-string macro-text))
(error (c)
(format t "error while evaluating macro ~A: ~A.~%" macro-text c)
(setf result 'broken))
(:no-error (result1)
;; (format t "evaluated macro ~A: ~A~%" macro-text result1)
(setf result result1)
(if (typep result1 'text-object)
(setf final-text-object result1)
(setf final-text-object (make-instance 'default-text-object)))))
(when (equal result 'broken)
(setf final-text-object (make-instance 'default-text-object)))
(let ((new-text-macro
(make-text-macro
:region (make-region :begin macro-begin-idx :end macro-end)
:result result
:final-text-object final-text-object)))
;; handle scope of macro pair (if we found one)
(when (subtypep (class-of final-text-object) 'cltpt::text-object)
(text-object-init final-text-object str1 new-text-macro nil))
(let ((done))
(loop for prev-macro in (reverse my-stack) for prev-macro-idx from 0 while (not done)
;; check if this macro is the closing of a previous macro, if so,
;; we have a "lexical" scope and we should drop everything in between
do (when (and prev-macro
(not (find prev-macro final-macro-pairs :key 'car))
(text-object-ends-by
(text-macro-final-text-object prev-macro)
(text-macro-result new-text-macro)))
;; this macro is the ending of the previous one, we're done with them
(push (cons prev-macro new-text-macro) final-macro-pairs)
(text-object-init final-text-object
str1
prev-macro
new-text-macro)
(setf (text-macro-final-text-object prev-macro) final-text-object)
(loop for i from 1 to prev-macro-idx
do (let ((popped (text-macro-final-text-object (car my-stack))))
(push popped
(text-object-children
(text-macro-final-text-object prev-macro)))
;; this could be done more generailly and DRYed by having a function in the region struct
(decf (region-begin (text-macro-region (text-object-opening-macro popped)))
(region-end (text-macro-region prev-macro)))
(decf (region-end (text-macro-region (text-object-opening-macro popped)))
(region-end (text-macro-region prev-macro)))
(setf my-stack (cdr my-stack))))
(setf done t)))
(unless done
(push new-text-macro my-stack)))
(push new-text-macro macros)))))
;; at this point we have `final-macro-pairs' and their respective text objects
;; but the objects have no notion of parenthood, we need to handle this here
;; which is probably not the most efficient way to go about it, as we could
;; probably do that while iterating and here we'd doing repetitive work.
;; but the algorithm isnt hard to grasp this way, maybe we'll want an improvement
;; in the future.
(if as-tree
(let ((doc (make-document str1 nil)))
(setf (text-object-children doc) (mapcar (lambda (entry) (text-macro-final-text-object entry)) my-stack))
doc)
(mapcar (lambda (entry) (text-macro-final-text-object entry)) my-stack))))
(defun is-macro-contained-in-pair (macro macro-pair)
(and (> (region-begin (text-macro-region macro))
(region-begin (text-macro-region (car macro-pair))))
(< (region-begin (text-macro-region macro))
(region-begin (text-macro-region (cdr macro-pair))))))
(defun find-nested-pairs (str1 begin end &optional (pos 0))
"returns conses of the form (begin . end).
example usage:
(find-nested-pairs \"hello(mystr()here)\" \"(\" \")\")
((5 . 17) (11 . 12))"
(let ((my-stack)
(my-pos pos)
(all-found))
(loop while (< my-pos (length str1))
do (let ((substr (subseq str1 my-pos)))
(if (uiop:string-prefix-p begin substr)
;; we found an instance of `begin', push it onto the stack
(progn
(push (cons my-pos -1) my-stack)
(setf my-pos (+ my-pos (length begin))))
;; if we didnt detect a `begin', check if we have an instance of `end'
(if (uiop:string-prefix-p end substr)
;; if stack isnt empty, pull one instance, we found a match, if stack
;; is empty, we found an `end' with no `begin', we ignore it.
(when my-stack
(setf (cdr (car my-stack)) (1+ my-pos))
(push (car my-stack) all-found)
(setf my-stack (cdr my-stack))
(setf my-pos (+ my-pos (length end))))
;; neither an instance of `begin' nor `end', continue searching
(setf my-pos (1+ my-pos))))))
all-found))
(defun find-nested-pairs-regex (str1 begin end &optional (pos 0))
"similar to `find-nested-pairs', but accepts regexes.
returns a list of conses of the form ((begin . end) . (begin . end))."
(let ((my-stack)
(my-pos pos)
(all-found))
(loop while (< my-pos (length str1))
do (let* ((substr (subseq str1 my-pos))
(begin-match-result (cl-ppcre:all-matches begin substr))
(end-match-result (cl-ppcre:all-matches end substr))
;; where the matched `begin' starts (if at all)
(begin-match-begin-idx (car begin-match-result))
;; where the matched `begin' ends (if at all)
(begin-match-end-idx (cadr begin-match-result))
;; where the matched `end' starts (if at all)
(end-match-begin-idx (car end-match-result))
(end-match-end-idx (cadr end-match-result)))
(if (equal begin-match-begin-idx 0)
;; we found a (possibly nested) instance of `begin', push it onto the stack
(progn
(let ((begin-match-region (cons my-pos (+ my-pos begin-match-end-idx))))
(push begin-match-region my-stack)
(setf my-pos (+ my-pos begin-match-end-idx))))
;; if we didnt detect a `begin', check if we have an instance of `end'
(if (equal end-match-begin-idx 0)
;; if stack isnt empty, pull one instance, we found a match, if stack
;; is empty, we found an `end' with no `begin', we ignore it.
(when my-stack
(let* ((begin-match-region (car my-stack))
(end-match-region (cons my-pos (+ my-pos end-match-end-idx)))
(pair (cons begin-match-region end-match-region)))
(push pair all-found)
(setf my-stack (cdr my-stack))
(setf my-pos (+ my-pos end-match-end-idx))))
;; neither an instance of `begin' nor `end', continue searching
(setf my-pos (1+ my-pos))))))
all-found))
(defun find-nested-multiple-pairs (str begins ends &optional (pos 0))
"returns a list of quadruples (start-index end-index begin-delim end-delim)
for nested delimiters in STR.
Example usage:
(find-nested-multiple-pairs \"hello(mystr[[hey (i)]]here)\" (list \"(\" \"[[\") (list \")\" \"]]\"))
might return:
((5 27 \"(\" \")\") (11 22 \"[[\" \"]]\") (17 20 \"(\" \")\"))
here, each pair is represented by its starting index, one-past the ending index,
the begin delimiter, and its matching end delimiter."
(let ((pairs '())
(stack '())
;; build an association list mapping each begin delimiter to its corresponding end.
(delim-alist (mapcar #'cons begins ends))
(len (length str))
(i pos))
(loop while (< i len) do
(let ((matched nil))
;; check if current position matches any begin delimiter.
(let ((begin-pair (find-if (lambda (pair)
(uiop:string-prefix-p (car pair)
(subseq str i)))
delim-alist)))
(when begin-pair
;; push a record (start-index, begin-delim, expected-end-delim)
(push (list i (car begin-pair) (cdr begin-pair)) stack)
(incf i (length (car begin-pair)))
(setf matched t)))
;; if no begin delimiter was found, check for any end delimiter.
(unless matched
(let ((end-pair (find-if (lambda (pair)
(uiop:string-prefix-p (cdr pair)
(subseq str i)))
delim-alist)))
(when end-pair
;; only record a pair if the top of the stack expects this ending delimiter.
(when (and stack (string= (third (first stack))
(cdr end-pair)))
(let* ((rec (pop stack))
(start (first rec))
(b (second rec))
(e (third rec)))
(push (list start (+ i (length (cdr end-pair))) b e) pairs)))
(incf i (length (cdr end-pair)))
(setf matched t)))
(unless matched
(incf i)))))
;; return the pairs sorted by the starting index.
(sort pairs #'< :key #'first)))
(defun find-regex (text pattern)
"finds regex matches across lines."
(let ((matches)
(lines (str:split (string #\Newline) text)))
(loop for line in lines
for line-pos = 0 then (+ line-pos (length line) 1)
do (progn
(loop for (start end) on (cl-ppcre:all-matches pattern line) by #'cddr
do (push (list (subseq line start end) (+ line-pos start) (+ line-pos end))
matches))))
(nreverse matches)))
;; optimized but incomplete version of the first
(defun find-multiple-pairs-1 (str rules &optional (pos 0))
"finds nested delimiter pairs in STR based on RULES.
RULES is a list of (begin type1 end type2), where:
- begin and end are either strings or regex patterns.
- type1 and type2 are either :string (for normal matching) or :regex (for regex matching).
returns a list of quadruples (start-index end-index begin-delim end-delim).
example:
CLTPT> (find-multiple-pairs \"hello(mystr[[hey (i)]]here)\"
'((\"(\" :string \")\" :string)
(\"[[\" :string \"]]\" :string)
(\"\\w+\\(\" :regex \")\" :string)))
=> ((0 27 \"\\w+\\(\" \")\") (11 22 \"[[\" \"]]\") (17 20 \"(\" \")\"))
"
(let ((pairs '()) ;; ensure pairs is always properly initialized
(stack '()) ;; Stack to track opening delimiters
(len (length str))
(i pos))
(loop while (< i len) do
(let ((matched nil))
;; check if the current position matches any begin delimiter.
(let ((begin-rule (find-if (lambda (rule)
(destructuring-bind (b type _ _) rule
(cond
((eq type :string)
(and (<= (+ i (length b)) len) ;; Avoid out-of-bounds errors
(string= b (subseq str i (+ i (length b))))))
((eq type :regex)
(multiple-value-bind (match-start match-end)
(cl-ppcre:scan b str :start i)
(and match-start (= match-start i)))))))
rules)))
(when begin-rule
(destructuring-bind (b type e e-type) begin-rule
(let ((match-len (if (eq type :string)
(length b)
(length (cl-ppcre:scan-to-strings b str :start i)))))
(push (list i b e e-type) stack)
(incf i match-len)
(setf matched t)))))
;; if no begin delimiter was found, check for any end delimiter.
(unless matched
(let ((end-rule (find-if (lambda (rule)
(destructuring-bind (_ _ e e-type) rule
(cond
((eq e-type :string)
(and (<= (+ i (length e)) len)
(string= e (subseq str i (+ i (length e))))))
((eq e-type :regex)
(multiple-value-bind (match-start match-end)
(cl-ppcre:scan e str :start i)
(and match-start (= match-start i)))))))
rules)))
(when end-rule
;; only record a pair if the top of the stack expects this ending delimiter.
(when (and stack (destructuring-bind (_ b e e-type) (first stack)
(destructuring-bind (_ _ expected-e expected-e-type) end-rule
(and (string= e expected-e) (eq e-type expected-e-type)))))
(let* ((rec (pop stack))
(start (first rec))
(b (second rec))
(e (third rec))
(match-len (if (eq (fourth rec) :string) (length e)
(length (cl-ppcre:scan-to-strings e str :start i)))))
(push (list start (+ i match-len) b e) pairs)))
(incf i (if (eq (fourth end-rule) :string)
(length (third end-rule))
(length (cl-ppcre:scan-to-strings (third end-rule) str :start i))))
(setf matched t))))
(unless matched
(incf i))))
;; return the pairs sorted by the starting index.
(sort pairs #'< :key #'first)))
;; todo: optimize
(defun find-multiple-pairs (str rules &optional (pos 0) predicate)
"find nested delimiter pairs in STR based on RULES.
RULES is a list of (begin type1 end type2 identifier), where:
- begin and end are either strings or regex patterns.
- type1 and type2 are either :string (for normal matching) or :regex (for regex matching).
- identifier is an optional item to identify the rule.
returns a list of quintuples (start-index end-index begin-delim end-delim identifier).
example:
CLTPT> (find-multiple-pairs \"hello(mystr[[hey (i)]]here)\"
'((\"(\" :string \")\" :string \"paren\")
(\"[[\" :string \"]]\" :string \"double-square-bracket\")
(\"\\w+\\(\" :regex \")\" :string \"function-call\")))
=> ((0 27 \"\\w+\\(\" \")\" \"function-call\") (11 22 \"[[\" \"]]\" \"double-square-bracket\") (17 20 \"(\" \")\" \"paren\"))
"
(let ((pairs)
(stack)
(len (length str))
(i pos))
(loop while (< i len) do
(let ((matched nil))
;; check if the current position matches any begin delimiter.
(let ((begin-rule (find-if (lambda (rule)
(destructuring-bind (b type _ _ _) rule
(cond
((eq type :string)
(and (<= (+ i (length b)) len)
(string= b (subseq str i (+ i (length b))))))
((eq type :regex)
(multiple-value-bind (match-start match-end)
(cl-ppcre:scan b str :start i)
(and match-start (= match-start i)))))))
rules)))
(when begin-rule
(destructuring-bind (b type e e-type identifier) begin-rule
(let ((match-len (if (eq type :string)
(length b)
(length (cl-ppcre:scan-to-strings b str :start i)))))
(push (list i b e e-type identifier) stack)
(incf i match-len)
(setf matched t)))))
;; if no begin delimiter was found, check for any end delimiter.
(unless matched
(let ((end-rule (find-if (lambda (rule)
(destructuring-bind (_ _ e e-type _) rule
(cond
((eq e-type :string)
(and (<= (+ i (length e)) len)
(string= e (subseq str i (+ i (length e))))))
((eq e-type :regex)
(multiple-value-bind (match-start match-end)
(cl-ppcre:scan e str :start i)
(and match-start (= match-start i)))))))
rules)))
(when end-rule
;; only record a pair if the top of the stack expects this ending delimiter.
(when (and stack (destructuring-bind (_ b e e-type identifier) (first stack)
(destructuring-bind (_ _ expected-e expected-e-type _) end-rule
(and (string= e expected-e) (eq e-type expected-e-type))))
(or (not predicate)
(funcall predicate b e)))
(let* ((rec (pop stack))
(start (first rec))
(b (second rec))
(e (third rec))
(identifier (fifth rec))
(match-len (if (eq (fourth rec) :string) (length e)
(length (cl-ppcre:scan-to-strings e str :start i)))))
(push (list start (+ i match-len) b e identifier) pairs)))
(incf i (if (eq (fourth end-rule) :string)
(length (third end-rule))
(length (cl-ppcre:scan-to-strings (third end-rule) str :start i))))
(setf matched t))))
(unless matched
(incf i))))
;; return the pairs sorted by the starting index.
(sort pairs #'< :key #'first)))
;; code for 'pair method
;; A
(defun match-at (spec str i)
"attempt to match SPEC at position i in STR.
SPEC is a two-element list: (type pattern) where type is either :string or :regex.
for :string, if the substring starting at i equals the pattern, it returns that match.
if not--and if the pattern does not begin with a colon--it also checks whether the
character at i is a colon and the substring starting at i+1 equals the pattern.
for :regex, it uses cl-ppcre:scan and requires the match to start at i.
returns three values: the match start, the match end, and the matched substring, or nil."
(destructuring-bind (type pattern) spec
(cond
((eq type :string)
(let ((plen (length pattern)))
(cond
((and (<= (+ i plen) (length str))
(string= pattern (subseq str i (+ i plen))))
(values i (+ i plen) (subseq str i (+ i plen))))
((and (< i (length str))
(not (char= (char pattern 0) #\:)) ; only try optional colon if pattern doesn't start with one
(char= (char str i) #\:)
(<= (+ i 1 plen) (length str))
(string= pattern (subseq str (1+ i) (+ i 1 plen))))
(values i (+ i 1 plen) (subseq str i (+ i 1 plen))))
(t nil))))
((eq type :regex)
(multiple-value-bind (m-start m-end)
(cl-ppcre:scan pattern str :start i)
(if (and m-start (= m-start i))
(values m-start m-end (subseq str m-start m-end))
nil)))
(t nil))))
;; A
(defun find-multiple-pairs (str rules &optional (pos 0))
"find nested delimiter pairs in STR based on RULES.
RULES is a list of property lists. each rule has:
:begin -- a two-element list specifying the begin pattern,
e.g. (:regex \":[a-zA-Z]+:\") or (:string \":begin:\")
:end -- a two-element list specifying the end pattern,
e.g. (:string \":end:\") or (:regex \"(?i):end:\")
:predicate -- an optional function taking the begin-match and end-match that returns T if they should be paired.
:id -- an optional identifier.
returns a list of quintuples:
(begin-index end-index begin-match end-match rule-id)."
(let ((pairs)
(stack)
(i pos)
(len (length str)))
(loop while (< i len) do
(if stack
(let* ((top (first stack)) ;; top is of the form (begin-index rule begin-match)
(rule (second top))
(end-spec (getf rule :end)))
(multiple-value-bind (ms me match) (match-at end-spec str i)
(if ms
(if (or (null (getf rule :predicate))
(funcall (getf rule :predicate) (third top) match))
(progn
(push (list (first top) me (third top) match (getf rule :id)) pairs)
(pop stack)
(setf i me))
;; predicate exists and returned false; discard the begin marker.
(pop stack))
(let ((found))
(dolist (r rules found)
(multiple-value-bind (msb meb matchb) (match-at (getf r :begin) str i)
(when msb
(push (list i r matchb) stack)
(setf i meb)
(setf found t)
(return))))
(unless found
(incf i 1))))))
(let ((found))
(dolist (r rules found)
(multiple-value-bind (ms me match) (match-at (getf r :begin) str i)
(when ms
(push (list i r match) stack)
(setf i me)
(setf found t)
(return))))
(unless found
(incf i 1)))))
(sort pairs #'< :key #'first)))
;; code for 'pair method, this is by far the slowest of the bunch
;; replaced with a faster version
;; A
(defun scan-events-for-spec (spec str)
"scan STR for all occurrences of SPEC.
returns a list of events, each as (start-index end-index matched-string)."
(destructuring-bind (type pattern) spec
(cond
((eq type :regex)
(let ((matches (cl-ppcre:all-matches pattern str)))
;; matches is a flat list: (start1 end1 start2 end2 …)
(let ((events))
(loop for (s e) on matches by #'cddr do
(push (list s e (subseq str s e)) events))
(nreverse events))))
((eq type :string)
(let ((direct)
(colon)
(plen (length pattern))
(slen (length str)))
;; scan for direct occurrences.
(loop for i from 0 below slen do
(when (and (<= (+ i plen) slen)
(string= pattern (subseq str i (+ i plen))))
(push (list i (+ i plen) (subseq str i (+ i plen))) direct)))
;; for patterns not starting with colon, also scan for colon-prefixed occurrences.
(unless (char= (char pattern 0) #\:)
(let ((colon-pattern (concatenate 'string ":" pattern))
(cplen (1+ plen)))
(loop for i from 0 below slen do
(when (and (<= (+ i cplen) slen)
(string= colon-pattern (subseq str i (+ i cplen))))
(push (list i (+ i cplen) (subseq str i (+ i cplen))) colon))))
;; merge both lists. If events occur at the same index, choose the one with the smaller end index.
(let ((all (append direct colon)))
(setf all (sort all (lambda (a b)
(if (= (first a) (first b))
(< (second a) (second b))
(< (first a) (first b))))))
all))))
(t nil))))
;; struct to hold per-rule pre-scanned events
(defstruct rule-info
rule ;; the original rule plist
begin-events ;; list of events for the :begin spec
end-events ;; list of events for the :end spec
begin-index ;; pointer into begin-events (integer)
end-index) ;; pointer into end-events (integer)
;; A
(defun find-multiple-pairs (str rules)
"find nested delimiter pairs in STR based on RULES using pre-scanned events.
RULES is a list of plists, each with keys:
:begin -- a two-element spec for the begin marker,
:end -- a two-element spec for the end marker,
:predicate (optional) -- a function of (begin-match end-match),
:id (optional) -- an identifier.
returns a list of quintuples:
(begin-index end-index begin-match end-match rule-id)."
(print rules)
(let* ((slen (length str))
;; build rule-info for each rule.
(rule-infos
(mapcar (lambda (r)
(make-rule-info
:rule r
:begin-events (scan-events-for-spec (getf r :begin) str)
:end-events (scan-events-for-spec (getf r :end) str)
:begin-index 0
:end-index 0))
rules))
(pairs)
;; the stack holds open begin markers. each element is a plist with keys:
;; :event (the begin event, a triple (start end match))
;; :rule-info (the associated rule-info structure)
(stack)
(pos 0))
(loop while (< pos slen) do
(if stack
;; there is an open begin marker.
(let* ((top (first stack))
(ri (getf top :rule-info))
(open-event (getf top :event)) ;; (start end match)
(open-pos (first open-event))
(begin-match (third open-event))
(end-evs (rule-info-end-events ri))
(ei (rule-info-end-index ri)))
(if (and (< ei (length end-evs))
(= (first (nth ei end-evs)) pos))
;; an end event for the top rule is found at pos.
(let ((end-event (nth ei end-evs))
(pred (getf (rule-info-rule ri) :predicate)))
(if (or (null pred)
(funcall pred begin-match (third end-event)))
(progn
(push (list open-pos (second end-event)
begin-match (third end-event)
(getf (rule-info-rule ri) :id))
pairs)
(pop stack)
(setf (rule-info-end-index ri) (1+ ei))
(setf pos (second end-event)))
(pop stack)))
;; no matching end event at pos; try to find a new begin event.
(let ((found))
(dolist (ri rule-infos found)
(let* ((bev (rule-info-begin-events ri))
(bi (rule-info-begin-index ri)))
(when (and (< bi (length bev))
(= (first (nth bi bev)) pos))
(push (list :event (nth bi bev)
:rule-info ri)
stack)
(setf pos (second (nth bi bev)))
(setf (rule-info-begin-index ri) (1+ bi))
(setf found t)
(return))))
(unless found (incf pos 1)))))
;; stack is empty: look for a begin event at pos.
(let ((found))
(dolist (ri rule-infos found)
(let* ((bev (rule-info-begin-events ri))
(bi (rule-info-begin-index ri)))
(when (and (< bi (length bev))
(= (first (nth bi bev)) pos))
(push (list :event (nth bi bev)
:rule-info ri)
stack)
(setf pos (second (nth bi bev)))
(setf (rule-info-begin-index ri) (1+ bi))
(setf found t)
(return))))
(unless found
;; no event at pos; jump to the next available begin event.
(let ((next-pos))
(dolist (ri rule-infos)
(let* ((bev (rule-info-begin-events ri))
(bi (rule-info-begin-index ri)))
(when (< bi (length bev))
(let ((candidate (first (nth bi bev))))
(when (or (null next-pos) (< candidate next-pos))
(setf next-pos candidate))))))
(if next-pos
(setf pos next-pos)
(setf pos slen)))))))
(sort pairs #'< :key #'first)))
;; code for the 'line-region method for regions of consecutive lines sharing a
;; common pattern.
;; A
(defun find-line-regions-matching-regex (text patterns &optional ids)
"finds contiguous regions of lines where each line matches the given regex.
for each pattern in PATTERNS (using the corresponding identifier from IDS, if provided),
this function returns a list of regions as (start-pos end-pos region-text id).
START-POS is the offset of the first line in the region and END-POS is the offset
immediately after the last matching line, while REGION-TEXT is the concatenated
text of all lines in the region."
(let ((matches)
(lines (str:split (string #\newline) text)))
(dotimes (i (length patterns))
(let ((line-pos 0)
(pattern (nth i patterns))
(id (if ids (nth i ids) pattern))
(region-start-pos)
(region-lines))
(loop for j from 0 below (length lines)
for line = (nth j lines)
do (if (cl-ppcre:scan pattern line)
(progn
(unless region-start-pos
(setf region-start-pos line-pos))
(push line region-lines))
(when region-lines
(let ((region-text (format nil "~{~a~^~%~}" (nreverse region-lines))))
(push (list region-start-pos
(+ region-start-pos (length region-text))
region-text
id)
matches))
(setf region-lines nil)
(setf region-start-pos nil)))
(incf line-pos (if (< j (1- (length lines)))
(1+ (length line))
(length line))))
(when region-lines
(let ((region-text (format nil "~{~a~^~%~}" (nreverse region-lines))))
(push (list region-start-pos
(+ region-start-pos (length region-text))
region-text
id)
matches)))))
(nreverse matches)))
(defun flatten (l depth)
"flatten L by at most DEPTH levels."
(and l
(if (zerop depth)
l
(if (atom (car l))
(cons (car l) (flatten (cdr l) depth))
(append (flatten (car l) (1- depth))
(flatten (cdr l) depth))))))

some code for trees i guess

  (defun make-node (interval)
(cons interval nil))
(defun intervals-conflict-p (c-start c-end p-start p-end)
"checks if two intervals (c-start, c-end) and (p-start, p-end)
overlap AND neither contains the other."
(let ((overlap (< (max c-start p-start) (min c-end p-end))))
(when overlap
(let ((c-contains-p (and (<= c-start p-start) (>= c-end p-end)))
(p-contains-c (and (<= p-start c-start) (>= p-end c-end))))
(not (or c-contains-p p-contains-c))))))
(defun build-forest (intervals)
"build a nested tree from a list of intervals, discarding entries
that overlap with a sibling or a root without being contained by it (or containing it).
each interval is a list of the form (start end id)."
(let ((forest)
(stack))
(setf intervals (sort (copy-list intervals) #'< :key #'first))
(dolist (current-interval-data intervals)
(let ((node-to-add (make-node current-interval-data))
(c-start (first current-interval-data))
(c-end (second current-interval-data))
(discard-current))
(loop while (and stack
(let* ((parent-node-on-stack (first stack))
(p-interval-on-stack (car parent-node-on-stack))
(p-start (first p-interval-on-stack))
(p-end (second p-interval-on-stack)))
(not (and (<= p-start c-start) (>= p-end c-end)))))
do (pop stack))
(let ((potential-parent-node (first stack)))
(if potential-parent-node
(dolist (sibling-node (cdr potential-parent-node))
(when (not discard-current)
(let* ((s-interval-data (car sibling-node))
(s-start (first s-interval-data))
(s-end (second s-interval-data)))
(when (intervals-conflict-p c-start c-end s-start s-end)
(setf discard-current t)))))
(dolist (existing-root-node forest)
(when (not discard-current)
(let* ((r-interval-data (car existing-root-node))
(r-start (first r-interval-data))
(r-end (second r-interval-data)))
(when (intervals-conflict-p c-start c-end r-start r-end)
(setf discard-current t)))))))
(unless discard-current
(if (first stack)
(progn
(push node-to-add (cdr (first stack)))
(push node-to-add stack))
(progn
(push node-to-add forest)
(push node-to-add stack))))))
(nreverse forest)))
(defun print-node (node &optional (indent 0))
"recursively prints a node and its children with indentation."
(format t "~v@T~a~%" indent (car node)) ;; print the node's interval with indentation.
(dolist (child (cdr node))
(print-node child (+ indent 2)))) ;; increase indent for children.
(defun print-forest (forest)
"prints all trees in the forest."
(dolist (node forest)
(print-node node 0)))
(defun mapcar-tree (node func)
(cons (funcall func (car node))
(mapcar (lambda (child)
(mapcar-tree child func))
(cdr node))))
(defun mapcar-forest (forest func)
"apply `mapcar-tree' to every node in the forest."
(mapcar (lambda (node)
(mapcar-tree node func))
forest))
(defun map-tree (node func)
(let ((result (mapcar (lambda (child)
(map-tree child func))
(cdr node))))
(cons (funcall func node)
result)))
(defun map-forest (forest func)
"apply map-tree to every node in the forest."
(mapcar (lambda (node)
(map-tree node func))
forest))
(defun tree-mapcar (node func &optional cond)
"we iterate through the tree one NODE at a time and run FUNC on each, COND
decides whether to recurse on a specific node. a tree is returned with nodes
replaced by the results of calling FUNC on them. children are handled first."
(if cond
(if (and (consp node) (funcall cond node))
(cons (funcall func (car node))
(loop for child in (cdr node)
collect (tree-mapcar child func cond)))
(cons (funcall func (car node))
(cdr node)))
(if (consp node)
(cons (funcall func (car node))
(loop for child in (cdr node)
collect (tree-mapcar child func cond)))
node)))
(defun tree-find (node item &key (test #'equal) (key #'identity))
(tree-mapcar
node
(lambda (other-node)
(when (funcall test item (funcall key other-node))
(return-from tree-find other-node))))
nil)