Added more unit tests

This commit is contained in:
Benoy Bose 2025-07-11 08:28:22 +05:30
parent 53b626fd2f
commit e93945f63a
15 changed files with 422 additions and 35 deletions

BIN
.DS_Store vendored Normal file

Binary file not shown.

24
.vscode/launch.json vendored
View File

@ -75,6 +75,30 @@
"ignoreFailures": true
}
]
},
{
"name": "Char Test",
"type": "cppdbg",
"request": "launch",
"program": "${workspaceFolder}/build/tests/char_tests",
"args": [],
"stopAtEntry": false,
"cwd": "${workspaceFolder}",
"environment": [],
"externalConsole": false,
"MIMode": "gdb",
"setupCommands": [
{
"description": "Enable pretty-printing for gdb",
"text": "-enable-pretty-printing",
"ignoreFailures": true
},
{
"description": "Set Disassembly Flavor to Intel",
"text": "-gdb-set disassembly-flavor intel",
"ignoreFailures": true
}
]
}
]
}

View File

@ -67,6 +67,7 @@
"thread": "cpp",
"cinttypes": "cpp",
"typeinfo": "cpp",
"variant": "cpp"
"variant": "cpp",
"list": "cpp"
}
}

View File

@ -28,4 +28,5 @@ ADD_SUBDIRECTORY(tests)
ENABLE_TESTING()
ADD_TEST(NAME integer_tests COMMAND integer_tests)
ADD_TEST(NAME double_tests COMMAND double_tests)
ADD_TEST(NAME bool_tests COMMAND bool_tests)
ADD_TEST(NAME bool_tests COMMAND bool_tests)
ADD_TEST(NAME char_tests COMMAND char_tests)

20
Hoo.g4
View File

@ -2,9 +2,25 @@ grammar Hoo;
import hoolexer;
statement: literalStatement;
statement: expressionStatement;
literalStatement: literal ';';
expressionStatement: expression ';';
expression:
primary # PrimaryExpression
| expression '*' expression # MultiplicationExpression
| expression '/' expression # DivisionExpression
| expression '%' expression # ReminderExpression
| expression '+' expression # AdditiveExpression
| expression '-' expression # SubtractExpression
| expression '<' '<' expression # LeftShiftExpression
| expression '>' '>' expression # RightShiftExpression
| expression '&' '&' expression # LogicalAndExpression
| expression '|' '|' expression # LogicalOrExpression;
primary:
literal # PrimaryLiteral
| '(' expression ')' # NestedExpression;
literal:
INTEGER_LITERAL

View File

@ -11,12 +11,25 @@ ADD_LIBRARY(hoocore STATIC
ParseError.hpp
ParseErrorHandler.hpp
ParseErrorHandler.cpp
ParseErrorException.hpp)
ParseErrorException.hpp
Utility.hpp
UTF8Char.h)
ADD_LIBRARY(hooruntime STATIC
UTF8Char.h
UTF8Char.cpp
)
ADD_EXECUTABLE(hoo
Hoo.cpp Visitor.cpp
${ANTLR_GENERATED_DIR}/HooBaseVisitor.cpp
${ANTLR_GENERATED_DIR}/HooVisitor.cpp
${ANTLR_GENERATED_DIR}/HooLexer.cpp
${ANTLR_GENERATED_DIR}/HooParser.cpp)
ADD_DEPENDENCIES(hoo HooBaseVisitor)
TARGET_LINK_LIBRARIES(hoo antlr4-runtime LLVMCore LLVMSupport)
ADD_DEPENDENCIES(hoo HooBaseVisitor hoocore hooruntime)
TARGET_LINK_LIBRARIES(hoo hoocore
hooruntime
antlr4-runtime
LLVMCore
LLVMSupport)

5
src/UTF8Char.cpp Normal file
View File

@ -0,0 +1,5 @@
#include "UTF8Char.h"
#include <string>
const std::string CHAR_TYPE_NAME = "hoo.UTF8Char";

25
src/UTF8Char.h Normal file
View File

@ -0,0 +1,25 @@
#ifndef UTF8_CHAR_H
#define UTF8_CHAR_H
#include <stdint.h>
#include <string>
extern "C" const std::string CHAR_TYPE_NAME;
const uint64_t UTF8_CHAR_BYTES = 2;
#ifdef __cplusplus
extern "C"
{
#endif
typedef struct
{
uint8_t length;
uint16_t bytes[UTF8_CHAR_BYTES];
} UTF8Char;
#ifdef __cplusplus
}
#endif
#endif // UTF8Char

198
src/Utility.hpp Normal file
View File

@ -0,0 +1,198 @@
#pragma once
#include "UTF8Char.h"
#include <string>
#include <vector>
#include <cstdint>
#include <stdexcept>
#include <map>
static const std::map<char, unsigned char> HEX_DIGITS = {
{'0', 0},
{'1', 1},
{'2', 2},
{'3', 3},
{'4', 4},
{'5', 5},
{'6', 6},
{'7', 7},
{'8', 8},
{'9', 9},
{'a', 10},
{'b', 11},
{'c', 12},
{'d', 13},
{'e', 14},
{'f', 15},
{'A', 10},
{'B', 11},
{'C', 12},
{'D', 13},
{'E', 14},
{'F', 15},
};
class Utility
{
public:
static const std::uint8_t CHAR_NEWLINE = 10;
static const std::uint8_t CHAR_CARRIAGE_RETURN = 13;
static const std::uint8_t CHAR_TAB = 9;
static const std::uint8_t CHAR_BACKSPACE = 8;
static const std::uint8_t CHAR_FORMFEED = 12;
static const std::uint8_t CHAR_BACK_SLASH = 92;
static const std::uint8_t CHAR_SINGLE_QUOTE = 39;
static const int CHAR_A_UPPER = 65;
static const int CHAR_F_UPPER = 70;
static const int CHAR_0_DIGIT = 48;
static const int CHAR_9_DIGIT = 57;
static const int CHAR_A_LOWER = 97;
static const int CHAR_F_LOWER = 102;
static UTF8Char getCharType(std::uint8_t ansiChar)
{
auto charType = UTF8Char();
charType.length = 1;
charType.bytes[0] = ansiChar;
return charType;
}
static const int UTF8_HEXDIGIT_COUNT = 4;
static const int UTF8_BYTE_COUNT = 2;
static std::array<std::string, 2> splitUnicodeString(const std::string &input)
{
if (input.length() != UTF8_HEXDIGIT_COUNT)
{
throw std::invalid_argument("Invalid hex string.");
}
std::array<std::string, 2> result;
result[0] = input.substr(0, UTF8_BYTE_COUNT);
result[1] = input.substr(2, UTF8_BYTE_COUNT);
return result;
}
static uint8_t
hexStringToByte(const std::string &hexByteString)
{
if (hexByteString.length() != UTF8_BYTE_COUNT)
{
throw std::invalid_argument("Hex string must be exactly 2 characters long.");
}
char c0 = hexByteString[0];
char c1 = hexByteString[1];
auto o0 = HEX_DIGITS.at(c0);
auto o1 = HEX_DIGITS.at(c1);
uint8_t result = (o0 * 16) + o1;
return result;
}
static uint16_t combineUint8s(uint8_t byte1, uint8_t byte2)
{
uint16_t result = (static_cast<uint16_t>(byte1) << 8) | byte2;
return result;
}
static std::vector<uint8_t> utf16CodePointToUTF8(uint16_t codePoint)
{
std::vector<uint8_t> utf8Bytes;
if (codePoint <= 0x7F)
{
utf8Bytes.push_back(static_cast<uint8_t>(codePoint));
}
else if (codePoint <= 0x7FF)
{
utf8Bytes.push_back(static_cast<uint8_t>(0xC0 | (codePoint >> 6)));
utf8Bytes.push_back(static_cast<uint8_t>(0x80 | (codePoint & 0x3F)));
}
else if (codePoint <= 0xFFFF)
{
utf8Bytes.push_back(static_cast<uint8_t>(0xE0 | (codePoint >> 12)));
utf8Bytes.push_back(static_cast<uint8_t>(0x80 | ((codePoint >> 6) & 0x3F)));
utf8Bytes.push_back(static_cast<uint8_t>(0x80 | (codePoint & 0x3F)));
}
else
{
throw std::invalid_argument("Invalid Unicode code point (outside BMP).");
}
return utf8Bytes;
}
static UTF8Char hexStringToUTF8(const std::string &hexString)
{
auto byteStrings = splitUnicodeString(hexString);
auto byte1 = hexStringToByte(byteStrings[0]);
auto byte2 = hexStringToByte(byteStrings[1]);
auto codePoint = combineUint8s(byte1, byte2);
auto utf8Bytes = utf16CodePointToUTF8(codePoint);
auto utf8Char = UTF8Char{static_cast<uint8_t>(utf8Bytes.size()), {0, 0}};
for (auto index = 0; index < utf8Bytes.size(); ++index)
{
utf8Char.bytes[index] = utf8Bytes[index];
}
return utf8Char;
}
static UTF8Char
getChar(std::string charText)
{
charText = charText.substr(1, charText.size() - 2);
bool escape = false;
for (auto index = 0; index < charText.size(); ++index)
{
auto c = charText[index];
if (escape)
{
switch (c)
{
case 'n':
return getCharType(CHAR_NEWLINE);
case 'r':
return getCharType(CHAR_CARRIAGE_RETURN);
case 't':
return getCharType(CHAR_TAB);
case 'b':
return getCharType(CHAR_BACKSPACE);
case 'f':
return getCharType(CHAR_FORMFEED);
case '\\':
return getCharType(CHAR_BACK_SLASH);
case '\'':
return getCharType(CHAR_SINGLE_QUOTE);
case 'u':
{
std::string unicodeHexDigits = charText.substr(index + 1, 4);
auto utf8Char = hexStringToUTF8(unicodeHexDigits);
return utf8Char;
}
default:
throw std::runtime_error("Invalid char escape sequence");
}
}
else
{
if (c == '\\')
{
escape = true;
continue;
}
else if (c <= 0x7F)
{
return getCharType((std::uint8_t)c);
}
else
{
throw std::runtime_error("Invalid ansi character");
}
}
}
return UTF8Char({0, {0, 0}});
}
};

View File

@ -1,6 +1,8 @@
#include "Visitor.hpp"
#include "Node.hpp"
#include "ParseErrorException.hpp"
#include "Utility.hpp"
#include "UTF8Char.h"
#include <llvm/IR/Constants.h>
#include <llvm/IR/Type.h>
@ -15,12 +17,11 @@ Visitor::Visitor(const std::string &moduleName) : _moduleName(moduleName),
std::any Visitor::visitLiteral(HooParser::LiteralContext *ctx)
{
auto value = ctx->INTEGER_LITERAL();
#ifndef NDEBUG
auto text = ctx->getText();
std::cout << "Literal: " << text << std::endl;
#endif
auto value = ctx->INTEGER_LITERAL();
if (value)
{
auto decimalText = value->getText();
@ -59,9 +60,19 @@ std::any Visitor::visitLiteral(HooParser::LiteralContext *ctx)
if (value)
{
auto charText = value->getText();
char charValue = charText[1];
llvm::Type *charType = llvm::Type::getInt8Ty(*_context);
llvm::Constant *charConstant = llvm::ConstantInt::get(charType, charValue, true);
auto charValue = Utility::getChar(charText);
auto byteType = llvm::Type::getInt8Ty(*_context);
auto byteArrayType = llvm::ArrayType::get(byteType, UTF8_CHAR_BYTES);
llvm::StructType *charType = llvm::StructType::create(*_context, CHAR_TYPE_NAME);
charType->setBody({byteType, byteArrayType});
llvm::Constant *charConstant = llvm::ConstantStruct::get(
charType,
{llvm::ConstantInt::get(byteType, charValue.length),
llvm::ConstantArray::get(byteArrayType, {
llvm::ConstantInt::get(byteType, charValue.bytes[0]),
llvm::ConstantInt::get(byteType, charValue.bytes[1]),
})});
return std::any{Node(NODE_LITERAL, DATATYPE_CHAR, charConstant)};
}
@ -82,23 +93,44 @@ std::any Visitor::visitLiteral(HooParser::LiteralContext *ctx)
throw ParseErrorException(_moduleName, line_no, char_pos, message);
}
std::any Visitor::visitLiteralStatement(HooParser::LiteralStatementContext *ctx)
std::any Visitor::visitPrimaryLiteral(HooParser::PrimaryLiteralContext *ctx)
{
auto literal_context = ctx->literal();
auto result = visitLiteral(literal_context);
return result;
auto literal = ctx->literal();
if (literal != nullptr)
{
auto node = visitLiteral(literal);
return node;
}
return std::any();
}
std::any Visitor::visitNestedExpression(HooParser::NestedExpressionContext *ctx)
{
auto expr_ctx = ctx->expression();
auto node = visit(expr_ctx);
return node;
}
std::any Visitor::visitExpressionStatement(HooParser::ExpressionStatementContext *ctx)
{
auto expressionCtx = ctx->expression();
if (expressionCtx != nullptr)
{
auto node = visit(expressionCtx);
return node;
}
return std::any();
}
std::any Visitor::visitStatement(HooParser::StatementContext *ctx)
{
auto listeral_stmt_ctx = ctx->literalStatement();
std::cout << "Statement: " << ctx->getText() << std::endl;
if (listeral_stmt_ctx != nullptr)
auto expr_stmt_ctx = ctx->expressionStatement();
if (expr_stmt_ctx != nullptr)
{
auto result = visitLiteralStatement(listeral_stmt_ctx);
return result;
auto node = visitExpressionStatement(expr_stmt_ctx);
return node;
}
return nullptr;
return std::any();
}
std::any Visitor::visitUnit(HooParser::UnitContext *ctx)

View File

@ -21,7 +21,9 @@ public:
public:
std::any visitLiteral(HooParser::LiteralContext *ctx) override;
std::any visitLiteralStatement(HooParser::LiteralStatementContext *ctx) override;
std::any visitPrimaryLiteral(HooParser::PrimaryLiteralContext *ctx) override;
std::any visitNestedExpression(HooParser::NestedExpressionContext *ctx) override;
std::any visitExpressionStatement(HooParser::ExpressionStatementContext *ctx) override;
std::any visitStatement(HooParser::StatementContext *ctx) override;
std::any visitUnit(HooParser::UnitContext *ctx) override;

View File

@ -1,13 +1,17 @@
FIND_PACKAGE (GTest REQUIRED)
ADD_EXECUTABLE(integer_tests integer_tests.cpp)
TARGET_LINK_LIBRARIES(integer_tests GTest::GTest GTest::Main hoocore antlr4-runtime LLVMCore LLVMSupport)
TARGET_LINK_LIBRARIES(integer_tests GTest::GTest GTest::Main hoocore hooruntime antlr4-runtime LLVMCore LLVMSupport)
TARGET_INCLUDE_DIRECTORIES(integer_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../src)
ADD_EXECUTABLE(double_tests double_tests.cpp)
TARGET_LINK_LIBRARIES(double_tests GTest::GTest GTest::Main hoocore antlr4-runtime LLVMCore LLVMSupport)
TARGET_LINK_LIBRARIES(double_tests GTest::GTest GTest::Main hoocore hooruntime antlr4-runtime LLVMCore LLVMSupport)
TARGET_INCLUDE_DIRECTORIES(double_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../src)
ADD_EXECUTABLE(bool_tests bool_tests.cpp)
TARGET_LINK_LIBRARIES(bool_tests GTest::GTest GTest::Main hoocore antlr4-runtime LLVMCore LLVMSupport)
TARGET_INCLUDE_DIRECTORIES(bool_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../src)
TARGET_LINK_LIBRARIES(bool_tests GTest::GTest GTest::Main hoocore hooruntime antlr4-runtime LLVMCore LLVMSupport)
TARGET_INCLUDE_DIRECTORIES(bool_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../src)
ADD_EXECUTABLE(char_tests char_tests.cpp)
TARGET_LINK_LIBRARIES(char_tests GTest::GTest GTest::Main hoocore hooruntime antlr4-runtime LLVMCore LLVMSupport)
TARGET_INCLUDE_DIRECTORIES(char_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../src)

View File

@ -55,7 +55,7 @@ TEST_F(BoolTest, LiteralFalse)
TEST_F(BoolTest, InvalidLiteral)
{
auto compiler = std::make_unique<Compiler>("notbool;", "main");
ASSERT_THROW(compiler->compile(), ParseErrorException);
ASSERT_THROW(compiler->compile(), ParseCollectiveErrorException);
}
TEST_F(BoolTest, MissingSemicolon)
@ -67,5 +67,5 @@ TEST_F(BoolTest, MissingSemicolon)
TEST_F(BoolTest, MixedCaseLiteral)
{
auto compiler = std::make_unique<Compiler>("True;", "main");
ASSERT_THROW(compiler->compile(), ParseErrorException);
ASSERT_THROW(compiler->compile(), ParseCollectiveErrorException);
}

67
tests/char_tests.cpp Normal file
View File

@ -0,0 +1,67 @@
#include "Compiler.hpp"
#include "Node.hpp"
#include "UTF8Char.h"
#include "llvm/IR/Constants.h"
#include <gtest/gtest.h>
// TEST(CharTest, SingleChar)
// {
// auto compiler = std::make_unique<Compiler>("'a';", "main");
// auto result = compiler->compile();
// auto charNode = std::any_cast<Node>(result);
// ASSERT_EQ(charNode.getNodeType(), NODE_LITERAL);
// ASSERT_EQ(charNode.getDataType(), DATATYPE_CHAR);
// auto value = charNode.getValue();
// auto expected_value = llvm::ConstantInt::get(llvm::Type::getInt8Ty(*compiler->getContext()), 97);
// ASSERT_EQ(value, expected_value);
// }
// TEST(CharTest, SpecialChar)
// {
// auto compiler = std::make_unique<Compiler>("'\\n';", "main");
// auto result = compiler->compile();
// auto charNode = std::any_cast<Node>(result);
// ASSERT_EQ(charNode.getNodeType(), NODE_LITERAL);
// ASSERT_EQ(charNode.getDataType(), DATATYPE_CHAR);
// auto value = charNode.getValue();
// auto expected_value = llvm::ConstantInt::get(llvm::Type::getInt8Ty(*compiler->getContext()), 10);
// ASSERT_EQ(value, expected_value);
// }
TEST(CharTest, UnicodeChar)
{
auto compiler = std::make_unique<Compiler>("'\\u00E9';", "main");
auto result = compiler->compile();
auto charNode = std::any_cast<Node>(result);
ASSERT_EQ(charNode.getNodeType(), NODE_LITERAL);
ASSERT_EQ(charNode.getDataType(), DATATYPE_CHAR);
auto value = charNode.getValue();
auto value_name = value->getType()->getStructName().str();
ASSERT_EQ(value_name, CHAR_TYPE_NAME);
auto structValue = llvm::dyn_cast<llvm::ConstantStruct>(value);
ASSERT_NE(structValue, nullptr);
llvm::Value *length = llvm::dyn_cast<llvm::ConstantInt>(structValue->getAggregateElement((unsigned int)0));
ASSERT_NE(length, nullptr);
auto length_value = llvm::dyn_cast<llvm::ConstantInt>(length)->getZExtValue();
ASSERT_EQ(length_value, 2);
llvm::ConstantDataArray *array = llvm::dyn_cast<llvm::ConstantDataArray>(structValue->getAggregateElement((unsigned int)1));
ASSERT_NE(array, nullptr);
auto byte1 = array->getElementAsConstant(0);
auto byte2 = array->getElementAsConstant(1);
auto byte1_value = llvm::dyn_cast<llvm::ConstantInt>(byte1)->getZExtValue();
ASSERT_EQ(byte1_value, 195);
auto byte2_value = llvm::dyn_cast<llvm::ConstantInt>(byte2)->getZExtValue();
ASSERT_EQ(byte2_value, 169);
}

View File

@ -1,9 +1,3 @@
#include "Compiler.hpp"
#include "Node.hpp"
#include "llvm/IR/Constants.h"
#include <gtest/gtest.h>
#include "Compiler.hpp"
#include "Node.hpp"
#include "llvm/IR/Constants.h"
@ -38,6 +32,11 @@ TEST(IntegerTest, LiteralOne)
testIntegerLiteral("1;", 1);
}
TEST(NestedIntegerTest, LiteralOne)
{
testIntegerLiteral("(1);", 1);
}
TEST(IntegerTest, LiteralNumberPositive)
{
testIntegerLiteral("67890;", 67890);