COMP3161/9164 Concepts of Programming Languages
Term 3, 2025

Completed Lexer

This is the completed lexer implementation.

module Lexer where

-- Import character-related functions from the standard library.
import Data.Char

-- A multi-constructor datatype representing tokens of a C file.
data Token = LParen | RParen | Semi
  | LBrace | RBrace
  | Ident String
  | Return
  | Number Integer 
  | Equals
  | Comma
  deriving (Show, Eq)


add1 :: Integer -> Integer
add1 x = (x + 1)


-- my_show :: Token -> String
-- my_show tok = case tok of
--  Return -> "Return"
--  Equals -> "Equals"

my_show :: Token -> String
my_show Return = "Return"
my_show (Number n) = show n



-- A type that is equivalent to the builtin list type.
data My_List a = Empty_List | Nonempty_List a (My_List a)
  deriving Show


-- Mapping from my list type to the standard one.
my_list_to_list :: My_List a -> [a]
my_list_to_list Empty_List = []
my_list_to_list (Nonempty_List x xs) = ([x] ++ my_list_to_list xs)



-- Mapping from the standard list to my version.
-- This allows us to see the real shape of [1, 2] and "3, 4".
list_to_my_list :: [a] -> My_List a
list_to_my_list [] = Empty_List
list_to_my_list (x : xs) = Nonempty_List x (list_to_my_list xs)




-- A simple lexer function.
lexer :: String -> [Token]
lexer ('{' : cs) = LBrace : lexer cs
lexer ('}' : cs) = RBrace : lexer cs
lexer ('(' : cs) = LParen : lexer cs
lexer (')' : cs) = RParen : lexer cs
lexer (';' : cs) = Semi : lexer cs
lexer (',' : cs) = Comma : lexer cs
lexer ('=' : cs) = Equals : lexer cs
lexer [] = []
lexer (c : cs) = if isDigit c
  then lex_from_digit (c : cs)
  else if isSpace c
  then lexer cs
  else if isAlpha c
  then lex_from_alpha (c : cs)
  else error ("lexer: no idea know what to do with " ++ show c)

-- Special-case for the lexer when the first char is a digit.
lex_from_digit :: String -> [Token]
lex_from_digit str = [Number (read digits)] ++ lexer rest
  where
    (digits, rest) = break (compose not isDigit) str

-- Special-case when the first char is a alphabetical.
lex_from_alpha :: String -> [Token]
lex_from_alpha str = [Ident ident] ++ lexer rest
  where
    (ident, rest) = break (not . isAlphaNum) str

-- Function composition.
compose :: (b -> c) -> (a -> b) -> a -> c
compose f g x = f (g x)

2025-12-05 Fri 11:50

Announcements RSS