https://git.spwbk.site/swatson/git-site-gen/raw/master/lib/Gsg/Gather.pm
___________________________________
package Gsg::Gather;
use strict;
use warnings;
use Log::Log4perl qw(:easy);
use Shellex::Shellex qw(shellex findBin);
use Exporter qw(import);
our @EXPORT_OK = qw(get_file_tree get_projects trim_project_paths get_diff_stat);

sub get_diff_stat($$$$) {

       my $project_dir = shift;
       my $newest_commit = shift;
       my $compare_commit = shift;
       my $logger = shift;

       # git --git-dir=/home/git/git-site-gen.git/ diff --stat 37f54811d49d41a4d794594e5bbaaee2271d82ad 1afd193eda9a6bc703011a72afa273e560355713
       my $gitCmd = findBin("git",$logger);
       my $diff_stat = shellex("$gitCmd --git-dir=$project_dir $newest_commit $compare_commit",$logger);

       return $diff_stat;

}

sub get_projects($$$) {

       my $git_dir = shift;
       my $ignored_projects_ref = shift;
       my $logger = shift;
       my $ls_cmd = findBin("ls",$logger);
       my @git_project_dirs;
       foreach my $dir ( split("\n", shellex("$ls_cmd -d $git_dir/*/",$logger)) ) {
               if ( $dir !~ m/\.git/ ) {
                       next;
               }
               if ( grep( /^$dir$/, @$ignored_projects_ref ) ) {
                       $logger->info("Found $dir in ignore list, skipping...");
                       next;
               } else {
                       push(@git_project_dirs,$dir);
               }
       }

       return \@git_project_dirs;

}

sub trim_project_paths($$) {

       my $projects_ref = shift;
       my $logger = shift;

       my @trimmed_projects;
       foreach my $project_path ( @$projects_ref ) {
               # Chop parts of the path we dont need for the web root
               # /some/path/project.git/ -> project.git/
               if ( $project_path =~ m/\/?([^\/]+\.[^\.]+$)/ ) {
                       push(@trimmed_projects, $1);
               }
       }

       $logger->info("Returning trimmed project paths");
       return \@trimmed_projects;

}

sub get_file_tree($$) {

       my $projectDir = shift;
       my $logger = shift;
       my $gitCmd = findBin("git",$logger);

       # Get files
       my %file_tree;
       foreach my $file ( split("\n", shellex("$gitCmd --git-dir=\"$projectDir\" ls-tree --full-tree -r HEAD",$logger)) ) {
               chomp $file;
               $file =~ /([a-z0-9]{40})\t(.*)$/;
               # Name - object id
               $file_tree{$2} = $1;
       }

       # Get file content
       my %file_content;
       foreach my $filename ( keys %file_tree ) {
               my $content = shellex("$gitCmd --git-dir=\"$projectDir\" show $file_tree{$filename}",$logger);
               # - TODO -
               # A hack -- interested in a better way to detect if git files are binary
               # Also dramatically increases run time (~3 seconds additional run time, will likely ballon on bigger git repos)
               my $file_cmd = findBin("file",$logger);
               my $rm_cmd = findBin("rm",$logger);
               my $test_write_path = "/tmp/test";
               my $bin_test = shellex("$gitCmd --git-dir=\"$projectDir\" show $file_tree{$filename} > $test_write_path && $file_cmd -i $test_write_path && $rm_cmd $test_write_path",$logger);

               if ( $bin_test !~ m/text/ ) {
                       $content = "Binary file";
               }

               chomp $content;
               # Name - file content
               $file_content{$filename} = $content;
       }

       # Get logs
       my @commit_ids;
       foreach my $log_line ( split("\n",shellex("$gitCmd --git-dir=\"$projectDir\" log",$logger)) ) {
               if ( $log_line =~ m/commit\ ([a-z0-9]{40})/ ) {
                       push(@commit_ids,$1);
               }
       }

       my %commits;
       foreach my $commit_id ( @commit_ids ) {
               my $commit_info = shellex("git --git-dir=\"$projectDir\" show $commit_id",$logger);
               chomp $commit_info;
               $commits{$commit_id} = $commit_info;
       }

       # We return commit_ids as well to preserve ordering
       return ( \%file_tree, \%file_content, \%commits, \@commit_ids );

}

1;