initial commit@@ -1,6 +0,0 @@
-version: 2.0
-shards:
- tree_sitter:
- git: https://github.com/crystal-lang-tools/crystal-tree-sitter.git
- version: 0.1.0+git.commit.1d46ca231a641b30b8e7fbbae7eba050f7717a9f
-
@@ -1,17 +1,13 @@
-name: gitfoss_code_indexer
+name: gitfoss-code-indexer
version: 0.1.0
authors:
- - Willi Wonka <willi.wonka38@proton.me>
+ - your-name-here <your-email-here>
targets:
- code_indexer:
- main: src/code_indexer.cr
+ gitfoss-code-indexer:
+ main: src/gitfoss-code-indexer.cr
-crystal: ">= 1.19.1"
+crystal: '>= 1.19.1'
license: MIT
-
-dependencies:
- tree_sitter:
- github: crystal-lang-tools/crystal-tree-sitter
@@ -1,71 +0,0 @@
-require "tree_sitter"
-
-require "./extract_symbols.cr"
-
-class GitFOSS::Code::Indexer
- VERSION = "0.1.0"
-
- LANGUAGE = "c"
-
- @parser = TreeSitter::Parser.new
- @files = [] of String
- @symbols = {} of String => Array(CodeSymbol)
-
- def initialize
- TreeSitter::Repository.preload_all
- end
-
- def files=(values : Array(String))
- @files = values
- end
-
- def get_files_symbols : Hash(String, Array(CodeSymbol))
- @files.each do |p|
- @symbols[p] = get_symbols(p)
- end
- @symbols
- end
-
- def get_symbols(
- file_path : String
- ) : Array(CodeSymbol)
- symbols = [] of CodeSymbol
-
- path = Path[file_path]
- File.open(path) do |f|
- file = RepositoryFile.new(
- name: path.basename,
- content: f.gets_to_end,
- )
- @parser = TreeSitter::Parser.new (LANGUAGE)
-
- io = IO::Memory.new(file.content)
- tree = @parser.parse(nil, io)
-
- symbols = extract_symbols(
- @parser,
- tree.root_node,
- file,
- )
-
- pp tree.root_node
- pp symbols
- end
-
- symbols
- end
-end
-
-code_indexer = GitFOSS::Code::Indexer.new
-
-# pp TreeSitter::Repository.language_names
-
-code_indexer.files = [
- # "/home/xana/Dev/gitfoss-code-indexer/src/extract_symbols.cr",
- # "/home/xana/Dev/gitfoss-code-indexer/shard.yml",
- # "/home/xana/Dev/gitfoss/app/islands/RepositoriesList.tsx",
- "/home/xana/Dev/crystal-os/src/kernel/archive/c/main.c"
-]
-
-symbols = code_indexer.get_files_symbols
-pp symbols
@@ -1,81 +1,8 @@
-require "tree_sitter"
-require "json"
+require "tree-sitter"
-# JSON-serializable types as classes with explicit constructors
-class CodeArg
- include JSON::Serializable
- property type : String
- property name : String
-
- def initialize(@type : String = "", @name : String = "")
- end
-end
-
-class CodeProperty
- include JSON::Serializable
- property name : String
- property type : String
-
- def initialize(@name : String = "", @type : String = "")
- end
-end
-
-class CodeConstant
- include JSON::Serializable
- property name : String
- property value : String
-
- def initialize(@name : String = "", @value : String = "")
- end
-end
-
-class CodeMethod
- include JSON::Serializable
- property name : String
- property args : Array(CodeArg) = [] of CodeArg
- property returnType : String
-
- def initialize(@name : String = "", @args : Array(CodeArg) = [] of CodeArg, @returnType : String = "")
- end
-end
-
-class CodeFunction
- include JSON::Serializable
- property type : String = "function"
- property name : String
- property args : Array(CodeArg) = [] of CodeArg
- property returnType : String
- property visibility : String = "public"
-
- def initialize(@type : String = "function", @name : String = "", @args : Array(CodeArg) = [] of CodeArg, @returnType : String = "", @visibility : String = "public")
- end
-end
-
-class CodeClass
- include JSON::Serializable
- property type : String = "class"
- property name : String
- property properties : Array(CodeProperty) = [] of CodeProperty
- property methods : Array(CodeMethod) = [] of CodeMethod
- property constants : Array(CodeConstant) = [] of CodeConstant
+type CodeSymbol = Hash(String, String | Array(Hash(String, String)))
- def initialize(@type : String = "class", @name : String = "", @properties : Array(CodeProperty) = [] of CodeProperty, @methods : Array(CodeMethod) = [] of CodeMethod, @constants : Array(CodeConstant) = [] of CodeConstant)
- end
-end
-
-class CodeTopConstant
- include JSON::Serializable
- property type : String = "constant"
- property name : String
- property value : String
-
- def initialize(@type : String = "constant", @name : String = "", @value : String = "")
- end
-end
-
-alias CodeSymbol = CodeFunction | CodeClass | CodeTopConstant
-
-# RepositoryFile for demonstration purposes
+# Define a RepositoryFile class for demonstration purposes
class RepositoryFile
property name : String
property content : String
@@ -84,131 +11,81 @@ class RepositoryFile
end
end
+# Define a function to get the file content
def get_file_content(file : RepositoryFile) : String
file.content
end
-# Helpers to iterate children (prefer named children)
-def each_child(node : TreeSitter::Node)
- count = node.named_child_count
- if count > 0
- i = 0
- while i < count
- yield node.named_child(i.to_i)
- i += 1
- end
- else
- count = node.child_count
- i = 0
- while i < count
- yield node.child(i)
- i += 1
- end
- end
-end
-
-# Find first child matching predicate
-def find_child(node : TreeSitter::Node, source : String, &block : (TreeSitter::Node -> Bool)) : TreeSitter::Node?
- found = nil : TreeSitter::Node?
- each_child(node) do |child|
- if block.call(child)
- found = child
- break
- end
- end
- found
-end
-
-def node_text_or_empty(node : TreeSitter::Node?, source : String) : String
- if node
- node.text(source)
- else
- ""
- end
-end
-
+# Define the extract_symbols method
def extract_symbols(parser : TreeSitter::Parser, node : TreeSitter::Node, file : RepositoryFile) : Array(CodeSymbol)
- source = get_file_content(file)
symbols = [] of CodeSymbol
case node.type
when "function"
- args = [] of CodeArg
- each_child(node) do |child|
- if child.type == "parameter"
- param_type_node = find_child(child, source) { |n| n.type == "type" }
- param_name_node = find_child(child, source) { |n| n.type == "identifier" }
- args << CodeArg.new(type: node_text_or_empty(param_type_node, source), name: node_text_or_empty(param_name_node, source))
- end
+ args = [] of Hash(String, String)
+ node.children.select { |child| child.type == "parameter" }.each do |child|
+ args << {
+ "type" => child.children.find { |grandchild| grandchild.type == "type" }.inspect,
+ "name" => child.children.find { |grandchild| grandchild.type == "identifier" }.text,
+ }
end
-
- return_type_node = find_child(node, source) { |c| c.type == "type" }
- name_node = find_child(node, source) { |c| c.type == "identifier" }
-
- symbols << CodeFunction.new(
- name: node_text_or_empty(name_node, source),
- args: args,
- returnType: node_text_or_empty(return_type_node, source),
- visibility: "public"
- )
+ return_type = node.children.find { |child| child.type == "type" }.inspect
+ symbols << {
+ "type" => "function",
+ "name" => node.children.find { |child| child.type == "identifier" }.text,
+ "args" => args,
+ "returnType" => return_type,
+ "visibility" => "public",
+ }
when "class"
- properties = [] of CodeProperty
- methods = [] of CodeMethod
- constants = [] of CodeConstant
-
- each_child(node) do |child|
+ properties = [] of Hash(String, String)
+ methods = [] of CodeSymbol
+ constants = [] of Hash(String, String)
+ node.children.each do |child|
case child.type
when "property"
- prop_name_node = find_child(child, source) { |n| n.type == "identifier" }
- prop_type_node = find_child(child, source) { |n| n.type == "type" }
- properties << CodeProperty.new(
- name: node_text_or_empty(prop_name_node, source),
- type: node_text_or_empty(prop_type_node, source)
- )
+ properties << {
+ "name" => child.children.find { |grandchild| grandchild.type == "identifier" }.text,
+ "type" => child.children.find { |grandchild| grandchild.type == "type" }.inspect,
+ }
when "function"
- method_args = [] of CodeArg
- each_child(child) do |grandchild|
- if grandchild.type == "parameter"
- ma_type_node = find_child(grandchild, source) { |g| g.type == "type" }
- ma_name_node = find_child(grandchild, source) { |g| g.type == "identifier" }
- method_args << CodeArg.new(type: node_text_or_empty(ma_type_node, source), name: node_text_or_empty(ma_name_node, source))
- end
+ args = [] of Hash(String, String)
+ child.children.select { |grandchild| grandchild.type == "parameter" }.each do |grandchild|
+ args << {
+ "type" => grandchild.children.find { |great_grandchild| great_grandchild.type == "type" }.inspect,
+ "name" => grandchild.children.find { |great_grandchild| great_grandchild.type == "identifier" }.text,
+ }
end
- method_return_node = find_child(child, source) { |n| n.type == "type" }
- method_name_node = find_child(child, source) { |n| n.type == "identifier" }
- methods << CodeMethod.new(
- name: node_text_or_empty(method_name_node, source),
- args: method_args,
- returnType: node_text_or_empty(method_return_node, source)
- )
+ return_type = child.children.find { |grandchild| grandchild.type == "type" }.inspect
+ methods << {
+ "name" => child.children.find { |grandchild| grandchild.type == "identifier" }.text,
+ "args" => args,
+ "returnType" => return_type,
+ }
when "constant"
- const_name_node = find_child(child, source) { |n| n.type == "identifier" }
- const_value_node = find_child(child, source) { |n| n.type == "type" }
- constants << CodeConstant.new(
- name: node_text_or_empty(const_name_node, source),
- value: node_text_or_empty(const_value_node, source)
- )
+ constants << {
+ "name" => child.children.find { |grandchild| grandchild.type == "identifier" }.text,
+ "value" => child.children.find { |grandchild| grandchild.type == "type" }.inspect,
+ }
end
end
-
- class_name_node = find_child(node, source) { |c| c.type == "identifier" }
- symbols << CodeClass.new(
- name: node_text_or_empty(class_name_node, source),
- properties: properties,
- methods: methods,
- constants: constants
- )
+ symbols << {
+ "type" => "class",
+ "name" => node.children.find { |child| child.type == "identifier" }.text,
+ "properties" => properties,
+ "methods" => methods,
+ "constants" => constants,
+ }
when "constant"
- name_node = find_child(node, source) { |c| c.type == "identifier" }
- value_node = find_child(node, source) { |c| c.type == "type" }
- symbols << CodeTopConstant.new(
- name: node_text_or_empty(name_node, source),
- value: node_text_or_empty(value_node, source)
- )
+ symbols << {
+ "type" => "constant",
+ "name" => node.children.find { |child| child.type == "identifier" }.text,
+ "value" => node.children.find { |child| child.type == "type" }.inspect,
+ }
end
- # Recurse into children
- each_child(node) do |child|
+ # Recursively extract symbols from child nodes
+ node.children.each do |child|
symbols.concat(extract_symbols(parser, child, file))
end
@@ -0,0 +1,49 @@
+require "tree-sitter"
+require "./extract_symbols.cr"
+
+type CodeSymbol = Hash(String, String | Array(Hash(String, String)))
+
+module GitFOSS::Code::Indexer
+ VERSION = "0.1.0"
+
+ @@parser = TreeSitter::Parser.new
+ @@files = [] of String
+ @@symbols = {} of String => CodeSymbol
+
+ def initialize
+ @@files = [
+ "~/tmp/file.js",
+ "~/tmp/file.cr",
+ "~/tmp/file.cpp",
+ "~/tmp/file.c",
+ ]
+
+ @@files.each do |p|
+ @@symbols[p] = get_symbols(p)
+ end
+
+ pp @@symbols
+ end
+
+ def get_symbols(
+ file_path : String
+ ) : Array(CodeSymbol)
+ symbols = [] of CodeSymbol
+
+ path = Path[file_path]
+ symbols = File.open(path) do |f|
+ file = RepositoryFile.new(
+ name: path.name,
+ content: file.gets_to_end,
+ )
+ tree = @@parser.parse(file.content)
+ extract_symbols(
+ @@parser,
+ tree.root_node,
+ file,
+ )
+ end
+
+ symbols
+ end
+end