@article(aho_indexed_1968,
 author       = {Alfred V. Aho},
 year         = {1968},
 title        = {Indexed Grammars--An Extension of Context-Free Grammars},
 journal      = {Journal of the {ACM}},
 volume       = {15},
 number       = {4},
 pages        = {647–671},
 doi          = {10.1145/321479.321488},
)
@inproceedings(arjovsky_unitary_2016,
 author       = {Martin Arjovsky and Amar Shah and Yoshua Bengio},
 year         = {2016},
 title        = {Unitary Evolution Recurrent Neural Networks},
 booktitle    = {Proceedings of the 33rd International Conference on International Conference on Machine Learning - Volume 48},
 series       = {ICML'16},
 publisher    = {JMLR.org},
 pages        = {1120--1128},
 doi          = {10.48550/arXiv.1511.06464},
)
@article(bernardy_can_2018,
 author       = {Jean-Philippe Bernardy},
 year         = {2018},
 title        = {Can RNNs Learn Nested Recursion?},
 journal      = {Linguistic Issues in Language Technology},
 volume       = {16},
 doi          = {10.33011/lilt.v16i.1417},
)
@inproceedings(bernardy_can_2021,
 author       = {Jean-Philippe Bernardy and Adam Ek and Vladislav Maraev},
 year         = {2021},
 title        = {Can the Transformer Learn Nested Recursion with Symbol Masking?},
 booktitle    = {Findings of the ACL 2021},
 doi          = {10.18653/v1/2021.findings-acl.67},
)
@article(bernardy_using_2017,
 author       = {Jean-Philippe Bernardy and Shalom Lappin},
 year         = {2017},
 title        = {Using Deep Neural Networks to Learn Syntactic Agreement},
 journal      = {Linguistic Issues In Language Technology},
 volume       = {15},
 number       = {2},
 pages        = {15},
 doi          = {10.33011/lilt.v15i.141},
)
@inproceedings(bernardy_neural_2022,
 author       = {Jean-Philippe Bernardy and Shalom Lappin},
 year         = {2022},
 title        = {A Neural Model for Compositional Word Embeddings and Sentence Processing},
 booktitle    = {Proceedings of The Workshop on Cognitive Modeling and Computational Linguistics},
 publisher    = {Association for Computational Linguistics},
 doi          = {10.18653/v1/2022.cmcl-1.2},
 url          = {https://aclanthology.org/2022.cmcl-1.2/},
)
@article(coecke_mathematical_2010,
 author       = {Bob Coecke and Mehrnoosh Sadrzadeh and Stephen Clark},
 year         = {2010},
 title        = {Mathematical Foundations for a Compositional Distributional Model of Meaning},
 journal      = {Lambek Festschrift, Linguistic Analysis},
 volume       = {36},
 doi          = {10.48550/arXiv.1003.4394},
)
@article(elman_finding_1990,
 author       = {Jeffrey L. Elman},
 year         = {1990},
 title        = {Finding structure in time},
 journal      = {Cognitive Science},
 volume       = {14},
 number       = {2},
 pages        = {179--211},
 doi          = {10.1016/0364-0213(90)90002-E},
)
@article(elman_distributed_1991,
 author       = {Jeffrey L. Elman},
 year         = {1991},
 title        = {Distributed representations, simple recurrent networks, and grammatical structure},
 journal      = {Machine learning},
 volume       = {7},
 number       = {2-3},
 pages        = {195--225},
 doi          = {10.1007/BF00114844},
)
@inproceedings(grefenstette_concrete_2011,
 author       = {Edward Grefenstette and Mehrnoosh Sadrzadeh and Stephen Clark and Bob Coecke and Stephen Pulman},
 year         = {2011},
 title        = {Concrete Sentence Spaces for Compositional Distributional Models of Meaning},
 booktitle    = {Proceedings of the Ninth International Conference on Computational Semantics ({IWCS} 2011)},
 url          = {https://aclanthology.org/W11-0114},
)
@inproceedings(gulordava_colorless_2018,
 author       = {Kristina Gulordava and Piotr Bojanowski and Edouard Grave and Tal Linzen and Marco Baroni},
 year         = {2018},
 title        = {Colorless Green Recurrent Networks Dream Hierarchically},
 booktitle    = {Proceedings of the 2018 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers)},
 publisher    = {Association for Computational Linguistics},
 address      = {New Orleans, Louisiana},
 pages        = {1195--1205},
 doi          = {10.18653/v1/N18-1108},
)
@article(hewitt_rnns_2020,
 author       = {John Hewitt and Michael Hahn and Surya Ganguli and Percy Liang and Christopher D Manning},
 year         = {2020},
 title        = {RNNs can generate bounded hierarchical languages with optimal memory},
 journal      = {arXiv preprint arXiv:2010.07515},
 doi          = {10.18653/v1/2020.emnlp-main.156},
)
@article(hochreiter_long_1997,
 author       = {Sepp Hochreiter and J{\"u}rgen Schmidhuber},
 year         = {1997},
 title        = {Long short-term memory},
 journal      = {Neural Computation},
 volume       = {9},
 number       = {8},
 pages        = {1735--1780},
 doi          = {10.1162/neco.1997.9.8.1735},
)
@inproceedings(hyland_learning_2017,
 author       = {Stephanie L Hyland and Gunnar R{\"a}tsch},
 year         = {2017},
 title        = {Learning unitary operators with help from {u(n)}},
 booktitle    = {Thirty-First AAAI Conference on Artificial Intelligence},
 doi          = {10.1609/aaai.v31i1.10928},
)
@inproceedings(jing_tunable_2017,
 author       = {Li Jing and Yichen Shen and Tena Dubček and John Peurifoi and Scott Skirlo and Yann LeCun and Max Tegmark and Marin Soljačić},
 year         = {2017},
 title        = {Tunable Efficient Unitary Neural Networks ({EUNN}) and their application to {RNN}},
 booktitle    = {Proceedings of the 34th International Conference on Machine Learning - Volume 70},
 series       = {ICML'17},
 publisher    = {JMLR.org},
 pages        = {1733–1741},
)
@techreport(Joshi&etal1990,
 author       = {Aravind K. Joshi and K. Vijay Shanker and David Weir},
 year         = {1990},
 title        = {The Convergence of Mildly Context-Sensitive Grammar Formalisms},
 type         = {Technical Report},
 institution  = {Department of Computer and Information Science, University of Pennsylvania},
 address      = {Philadelphia, PA},
)
@misc(kiani_projunn_2022,
 author       = {Bobak Kiani and Randall Balestriero and Yann Lecun and Seth Lloyd},
 year         = {2022},
 title        = {projUNN: efficient method for training deep networks with unitary matrices},
 doi          = {10.48550/arXiv.2203.05483},
 url          = {https://arxiv.org/pdf/2203.05483.pdf},
)
@article(kirov_processing_2012,
 author       = {Christo Kirov and Robert Frank},
 year         = {2012},
 title        = {Processing of nested and cross-serial dependencies: an automaton perspective on SRN behaviour},
 journal      = {Connection Science},
 volume       = {24},
 number       = {1},
 pages        = {1--24},
 doi          = {10.1080/09540091.2011.641939},
)
@article(lambek_pregroup_2008,
 author       = {Joachim Lambek},
 year         = {2008},
 title        = {Pregroup Grammars and {Chomsky}'s Earliest Examples},
 journal      = {Journal of Logic, Language and Information},
 volume       = {17},
 pages        = {141--160},
 doi          = {10.1007/s10849-007-9053-2},
)
@book(lappin_deep_2021,
 author       = {Shalom Lappin},
 year         = {2021},
 title        = {Deep Learning and Linguistic Representation},
 publisher    = {CRC Press, Taylor \& Francis},
 address      = {Boca Raton, London, New York},
 doi          = {10.1201/9781003127086},
)
@article(linzen_assessing_2016,
 author       = {Tal Linzen and Emmanuel Dupoux and Yoav Golberg},
 year         = {2016},
 title        = {Assessing the Ability of {LSTM}s to Learn Syntax-Sensitive Dependencies},
 journal      = {Transactions of the Association of Computational Linguistics},
 volume       = {4},
 pages        = {521--535},
 doi          = {10.1162/tacl{\_}a{\_}00115},
)
@article(mcpheat_categorical_2021,
 author       = {Lachlan McPheat and Mehrnoosh Sadrzadeh and Hadi Wazni and Gijs Wijnholds},
 year         = {2021},
 title        = {Categorical Vector Space Semantics for Lambek Calculus with a Relevant Modality (Extended Abstract)},
 journal      = {Electronic Proceedings in Theoretical Computer Science},
 volume       = {333},
 pages        = {168--182},
 doi          = {10.4204/EPTCS.333.12},
)
@techreport(pulman_indexed_1985,
 author       = {Stephen Pulman and G. D. Ritchie},
 year         = {1985},
 title        = {Indexed Grammars and Intersecting Dependencies},
 type         = {Technical Report},
 number       = {23},
 institution  = {University of East Anglia},
)
@inproceedings(sennhauser_evaluating_2018,
 author       = {Luzi Sennhauser and Robert Berwick},
 year         = {2018},
 title        = {Evaluating the Ability of {LSTM}s to Learn Context-Free Grammars},
 booktitle    = {Proceedings of the 2018 {EMNLP} Workshop {B}lackbox{NLP}: Analyzing and Interpreting Neural Networks for {NLP}},
 publisher    = {Association for Computational Linguistics},
 address      = {Brussels, Belgium},
 pages        = {115--124},
 doi          = {10.18653/v1/W18-5414},
)
@article(shieber_evidence_1985,
 author       = {Stuart M. Shieber},
 year         = {1985},
 title        = {Evidence against the context-freeness of natural language},
 journal      = {Linguistics and Philosophy},
 volume       = {8},
 number       = {3},
 pages        = {333--343},
 doi          = {10.1007/BF00630917},
)
@article(stabler_varieties_2004,
 author       = {Edward P. Stabler},
 year         = {2004},
 title        = {Varieties of crossing dependencies: Structure dependence and mild context sensitivity},
 journal      = {Cognitive Science},
 volume       = {93},
 number       = {5},
 pages        = {699--720},
 doi          = {10.1207/s15516709cog2805{\_}4},
)
@book(Steedman2000,
 author       = {Mark Steedman},
 year         = {2000},
 title        = {The Syntactic Process},
 publisher    = {MIT Press},
 address      = {Cambridge, MA},
)
@inproceedings(sutskever_generating_2011,
 author       = {Ilya Sutskever and James Martens and Geoffrey E. Hinton},
 year         = {2011},
 title        = {Generating Text with Recurrent Neural Networks},
 editor       = {Lise Getoor and Tobias Scheffer},
 booktitle    = {Proceedings of the 28th International Conference on Machine Learning, {ICML} 2011, Bellevue, Washington, USA, June 28 - July 2, 2011},
 publisher    = {Omnipress},
 pages        = {1017--1024},
 url          = {https://icml.cc/2011/papers/524\_icmlpaper.pdf},
)
@inproceedings(wijnholds_representation_2020,
 author       = {Gijs Wijnholds and Mehrnoosh Sadrzadeh and Stephen Clark},
 year         = {2020},
 title        = {Representation Learning for Type-Driven Composition},
 booktitle    = {Proceedings of the 24th Conference on Computational Natural Language Learning},
 publisher    = {Association for Computational Linguistics},
 pages        = {313--324},
 doi          = {10.18653/v1/2020.conll-1.24},
)
@article(wisdom_full-capacity_2016,
 author       = {Scott Wisdom and Thomas Powers and John Hershey and Jonathan Le Roux and Les Atlas},
 year         = {2016},
 title        = {Full-capacity unitary recurrent neural networks},
 journal      = {Advances in neural information processing systems},
 volume       = {29},
 pages        = {4880--4888},
 doi          = {10.48550/arXiv.1611.00035},
)
@inproceedings(yu_learning_2019,
 author       = {Xiang Yu and Ngoc Thang Vu and Jonas Kuhn},
 year         = {2019},
 title        = {Learning the Dyck language with attention-based Seq2Seq models},
 booktitle    = {Proceedings of the 2019 ACL Workshop BlackboxNLP: Analyzing and Interpreting Neural Networks for NLP},
 pages        = {138--146},
 doi          = {10.18653/v1/W19-4815},
)