feat: make it build@@ -0,0 +1,6 @@
+version: 2.0
+shards:
+ tree_sitter:
+ git: https://github.com/crystal-lang-tools/crystal-tree-sitter.git
+ version: 0.1.0+git.commit.1d46ca231a641b30b8e7fbbae7eba050f7717a9f
+
@@ -1,13 +1,17 @@
-name: gitfoss-code-indexer
+name: gitfoss_code_indexer
version: 0.1.0
authors:
- - your-name-here <your-email-here>
+ - Willi Wonka <willi.wonka38@proton.me>
targets:
- gitfoss-code-indexer:
- main: src/gitfoss-code-indexer.cr
+ code_indexer:
+ main: src/code_indexer.cr
-crystal: '>= 1.19.1'
+crystal: ">= 1.19.1"
license: MIT
+
+dependencies:
+ tree_sitter:
+ github: crystal-lang-tools/crystal-tree-sitter
@@ -0,0 +1,57 @@
+require "tree_sitter"
+
+require "./extract_symbols.cr"
+
+class GitFOSS::Code::Indexer
+ VERSION = "0.1.0"
+
+ @parser = TreeSitter::Parser.new
+ @files = [] of String
+ @symbols = {} of String => Array(CodeSymbol)
+
+ def initialize
+ end
+
+ def files=(values : Array(String))
+ @files = values
+ end
+
+ def get_files_symbols : Hash(String, Array(CodeSymbol))
+ @files.each do |p|
+ @symbols[p] = get_symbols(p)
+ end
+ @symbols
+ end
+
+ def get_symbols(
+ file_path : String
+ ) : Array(CodeSymbol)
+ symbols = [] of CodeSymbol
+
+ path = Path[file_path]
+ symbols = File.open(path) do |f|
+ file = RepositoryFile.new(
+ name: path.basename,
+ content: f.gets_to_end,
+ )
+ @parser = TreeSitter::Parser.new ("crystal")
+ tree = @parser.parse(nil, file.content)
+ extract_symbols(
+ @parser,
+ tree.root_node,
+ file,
+ )
+ end
+
+ symbols
+ end
+end
+
+code_indexer = GitFOSS::Code::Indexer.new
+
+code_indexer.files = [
+ "/home/xana/Dev/gitfoss-code-indexer/src/extract_symbols.cr",
+ "/home/xana/Dev/gitfoss-code-indexer/shard.yml",
+ "/home/xana/Dev/gitfoss/app/islands/RepositoriesList.tsx"
+]
+code_indexer.get_files_symbols
@@ -1,8 +1,81 @@
-require "tree-sitter"
+require "tree_sitter"
+require "json"
-type CodeSymbol = Hash(String, String | Array(Hash(String, String)))
+# JSON-serializable types as classes with explicit constructors
+class CodeArg
+ include JSON::Serializable
+ property type : String
+ property name : String
+
+ def initialize(@type : String = "", @name : String = "")
+ end
+end
+
+class CodeProperty
+ include JSON::Serializable
+ property name : String
+ property type : String
+
+ def initialize(@name : String = "", @type : String = "")
+ end
+end
+
+class CodeConstant
+ include JSON::Serializable
+ property name : String
+ property value : String
+
+ def initialize(@name : String = "", @value : String = "")
+ end
+end
+
+class CodeMethod
+ include JSON::Serializable
+ property name : String
+ property args : Array(CodeArg) = [] of CodeArg
+ property returnType : String
+
+ def initialize(@name : String = "", @args : Array(CodeArg) = [] of CodeArg, @returnType : String = "")
+ end
+end
+
+class CodeFunction
+ include JSON::Serializable
+ property type : String = "function"
+ property name : String
+ property args : Array(CodeArg) = [] of CodeArg
+ property returnType : String
+ property visibility : String = "public"
+
+ def initialize(@type : String = "function", @name : String = "", @args : Array(CodeArg) = [] of CodeArg, @returnType : String = "", @visibility : String = "public")
+ end
+end
+
+class CodeClass
+ include JSON::Serializable
+ property type : String = "class"
+ property name : String
+ property properties : Array(CodeProperty) = [] of CodeProperty
+ property methods : Array(CodeMethod) = [] of CodeMethod
+ property constants : Array(CodeConstant) = [] of CodeConstant
+
+ def initialize(@type : String = "class", @name : String = "", @properties : Array(CodeProperty) = [] of CodeProperty, @methods : Array(CodeMethod) = [] of CodeMethod, @constants : Array(CodeConstant) = [] of CodeConstant)
+ end
+end
-# Define a RepositoryFile class for demonstration purposes
+class CodeTopConstant
+ include JSON::Serializable
+ property type : String = "constant"
+ property name : String
+ property value : String
+
+ def initialize(@type : String = "constant", @name : String = "", @value : String = "")
+ end
+end
+
+alias CodeSymbol = CodeFunction | CodeClass | CodeTopConstant
+
+# RepositoryFile for demonstration purposes
class RepositoryFile
property name : String
property content : String
@@ -11,83 +84,152 @@ class RepositoryFile
end
end
-# Define a function to get the file content
def get_file_content(file : RepositoryFile) : String
file.content
end
-# Define the extract_symbols method
+# Helpers to iterate children (prefer named children)
+def each_child(node : TreeSitter::Node)
+ count = node.named_child_count
+ if count > 0
+ i = 0
+ while i < count
+ yield node.named_child(i.to_i)
+ i += 1
+ end
+ else
+ count = node.child_count
+ i = 0
+ while i < count
+ yield node.child(i)
+ i += 1
+ end
+ end
+end
+
+# Find first child matching predicate
+def find_child(node : TreeSitter::Node, source : String, &block : (TreeSitter::Node -> Bool)) : TreeSitter::Node?
+ found = nil : TreeSitter::Node?
+ each_child(node) do |child|
+ if block.call(child)
+ found = child
+ break
+ end
+ end
+ found
+end
+
+def node_text_or_empty(node : TreeSitter::Node?, source : String) : String
+ if node
+ node.text(source)
+ else
+ ""
+ end
+end
+
def extract_symbols(parser : TreeSitter::Parser, node : TreeSitter::Node, file : RepositoryFile) : Array(CodeSymbol)
+ source = get_file_content(file)
symbols = [] of CodeSymbol
case node.type
when "function"
- args = [] of Hash(String, String)
- node.children.select { |child| child.type == "parameter" }.each do |child|
- args << {
- "type" => child.children.find { |grandchild| grandchild.type == "type" }.inspect,
- "name" => child.children.find { |grandchild| grandchild.type == "identifier" }.text,
- }
+ args = [] of CodeArg
+ each_child(node) do |child|
+ if child.type == "parameter"
+ param_type_node = find_child(child, source) { |n| n.type == "type" }
+ param_name_node = find_child(child, source) { |n| n.type == "identifier" }
+ args << CodeArg.new(type: node_text_or_empty(param_type_node, source), name: node_text_or_empty(param_name_node, source))
+ end
end
- return_type = node.children.find { |child| child.type == "type" }.inspect
- symbols << {
- "type" => "function",
- "name" => node.children.find { |child| child.type == "identifier" }.text,
- "args" => args,
- "returnType" => return_type,
- "visibility" => "public",
- }
+
+ return_type_node = find_child(node, source) { |c| c.type == "type" }
+ name_node = find_child(node, source) { |c| c.type == "identifier" }
+
+ symbols << CodeFunction.new(
+ name: node_text_or_empty(name_node, source),
+ args: args,
+ returnType: node_text_or_empty(return_type_node, source),
+ visibility: "public"
+ )
when "class"
- properties = [] of Hash(String, String)
- methods = [] of CodeSymbol
- constants = [] of Hash(String, String)
- node.children.each do |child|
+ properties = [] of CodeProperty
+ methods = [] of CodeMethod
+ constants = [] of CodeConstant
+
+ each_child(node) do |child|
case child.type
when "property"
- properties << {
- "name" => child.children.find { |grandchild| grandchild.type == "identifier" }.text,
- "type" => child.children.find { |grandchild| grandchild.type == "type" }.inspect,
- }
+ prop_name_node = find_child(child, source) { |n| n.type == "identifier" }
+ prop_type_node = find_child(child, source) { |n| n.type == "type" }
+ properties << CodeProperty.new(
+ name: node_text_or_empty(prop_name_node, source),
+ type: node_text_or_empty(prop_type_node, source)
+ )
when "function"
- args = [] of Hash(String, String)
- child.children.select { |grandchild| grandchild.type == "parameter" }.each do |grandchild|
- args << {
- "type" => grandchild.children.find { |great_grandchild| great_grandchild.type == "type" }.inspect,
- "name" => grandchild.children.find { |great_grandchild| great_grandchild.type == "identifier" }.text,
- }
+ method_args = [] of CodeArg
+ each_child(child) do |grandchild|
+ if grandchild.type == "parameter"
+ ma_type_node = find_child(grandchild, source) { |g| g.type == "type" }
+ ma_name_node = find_child(grandchild, source) { |g| g.type == "identifier" }
+ method_args << CodeArg.new(type: node_text_or_empty(ma_type_node, source), name: node_text_or_empty(ma_name_node, source))
+ end
end
- return_type = child.children.find { |grandchild| grandchild.type == "type" }.inspect
- methods << {
- "name" => child.children.find { |grandchild| grandchild.type == "identifier" }.text,
- "args" => args,
- "returnType" => return_type,
- }
+ method_return_node = find_child(child, source) { |n| n.type == "type" }
+ method_name_node = find_child(child, source) { |n| n.type == "identifier" }
+ methods << CodeMethod.new(
+ name: node_text_or_empty(method_name_node, source),
+ args: method_args,
+ returnType: node_text_or_empty(method_return_node, source)
+ )
when "constant"
- constants << {
- "name" => child.children.find { |grandchild| grandchild.type == "identifier" }.text,
- "value" => child.children.find { |grandchild| grandchild.type == "type" }.inspect,
- }
+ const_name_node = find_child(child, source) { |n| n.type == "identifier" }
+ const_value_node = find_child(child, source) { |n| n.type == "type" }
+ constants << CodeConstant.new(
+ name: node_text_or_empty(const_name_node, source),
+ value: node_text_or_empty(const_value_node, source)
+ )
end
end
- symbols << {
- "type" => "class",
- "name" => node.children.find { |child| child.type == "identifier" }.text,
- "properties" => properties,
- "methods" => methods,
- "constants" => constants,
- }
+
+ class_name_node = find_child(node, source) { |c| c.type == "identifier" }
+ symbols << CodeClass.new(
+ name: node_text_or_empty(class_name_node, source),
+ properties: properties,
+ methods: methods,
+ constants: constants
+ )
when "constant"
- symbols << {
- "type" => "constant",
- "name" => node.children.find { |child| child.type == "identifier" }.text,
- "value" => node.children.find { |child| child.type == "type" }.inspect,
- }
+ name_node = find_child(node, source) { |c| c.type == "identifier" }
+ value_node = find_child(node, source) { |c| c.type == "type" }
+ symbols << CodeTopConstant.new(
+ name: node_text_or_empty(name_node, source),
+ value: node_text_or_empty(value_node, source)
+ )
end
- # Recursively extract symbols from child nodes
- node.children.each do |child|
+ # Recurse into children
+ each_child(node) do |child|
symbols.concat(extract_symbols(parser, child, file))
end
symbols
end
+
+# Example indexer that uses extract_symbols
+class CodeIndexer
+ # store multiple symbols per path -> change value type to Array(CodeSymbol)
+ property symbols : Hash(String, Array(CodeSymbol)) = {} of String => Array(CodeSymbol)
+
+ def initialize
+ @symbols = {} of String => Array(CodeSymbol)
+ end
+
+ def get_symbols(path : String) : Array(CodeSymbol)
+ # placeholder: parse file, build tree, call extract_symbols
+ file = RepositoryFile.new(path, File.read(path))
+ parser = TreeSitter::Parser.new
+ # (setup parser with language omitted for brevity)
+ root = parser.parse_string(file.content).root_node
+ extract_symbols(parser, root, file)
+ end
+end
@@ -1,49 +0,0 @@
-require "tree-sitter"
-require "./extract_symbols.cr"
-
-type CodeSymbol = Hash(String, String | Array(Hash(String, String)))
-
-module GitFOSS::Code::Indexer
- VERSION = "0.1.0"
-
- @@parser = TreeSitter::Parser.new
- @@files = [] of String
- @@symbols = {} of String => CodeSymbol
-
- def initialize
- @@files = [
- "~/tmp/file.js",
- "~/tmp/file.cr",
- "~/tmp/file.cpp",
- "~/tmp/file.c",
- ]
-
- @@files.each do |p|
- @@symbols[p] = get_symbols(p)
- end
-
- pp @@symbols
- end
-
- def get_symbols(
- file_path : String
- ) : Array(CodeSymbol)
- symbols = [] of CodeSymbol
-
- path = Path[file_path]
- symbols = File.open(path) do |f|
- file = RepositoryFile.new(
- name: path.name,
- content: file.gets_to_end,
- )
- tree = @@parser.parse(file.content)
- extract_symbols(
- @@parser,
- tree.root_node,
- file,
- )
- end
-
- symbols
- end
-end
@@ -1,49 +0,0 @@
-require "tree-sitter"
-require "./extract_symbols.cr"
-
-type CodeSymbol = Hash(String, String | Array(Hash(String, String)))
-
-module GitFOSS::Code::Indexer
- VERSION = "0.1.0"
-
- @@parser = TreeSitter::Parser.new
- @@files = [] of String
- @@symbols = {} of String => CodeSymbol
-
- def initialize
- @@files = [
- "~/tmp/file.js",
- "~/tmp/file.cr",
- "~/tmp/file.cpp",
- "~/tmp/file.c",
- ]
-
- @@files.each do |p|
- @@symbols[p] = get_symbols(p)
- end
-
- pp @@symbols
- end
-
- def get_symbols(
- file_path : String
- ) : Array(CodeSymbol)
- symbols = [] of CodeSymbol
-
- path = Path[file_path]
- symbols = File.open(path) do |f|
- file = RepositoryFile.new(
- name: path.name,
- content: file.gets_to_end,
- )
- tree = @@parser.parse(file.content)
- extract_symbols(
- @@parser,
- tree.root_node,
- file,
- )
- end
-
- symbols
- end
-end