diff --git a/core/src/main/resources/application.conf b/core/src/main/resources/application.conf index 412b6c3..da1656a 100644 --- a/core/src/main/resources/application.conf +++ b/core/src/main/resources/application.conf @@ -19,27 +19,35 @@ db { languagesConfig { haskell { + repository = "hackage" repoIndexUrl = "http://hackage.haskell.org/packages/index.tar.gz" + packageUrl = "https://hackage.haskell.org/package/%1$s-%2$s/%1$s-%2$s.tar.gz" repoArchivePath = "./data/meta/haskell/index.tar.gz" repoPath = "./data/meta/haskell/" concurrentTasksCount = 30 } rust { + repository = "crates" repoIndexUrl = "https://github.com/rust-lang/crates.io-index/archive/master.zip" + packageUrl = "https://crates.io/api/v1/crates/%s/%s/download" repoArchivePath = "./data/meta/rust/archive.zip" repoPath = "./data/meta/rust/" concurrentTasksCount = 30 + ignoreFiles = ["test-max-version-example-crate", "version-length-checking-is-overrated", "config.json", "archive.zip", ".git"] } ruby { + repository = "gem" repoIndexUrl = "http://rubygems.org/latest_specs.4.8.gz" + packageUrl = "https://rubygems.org/downloads/%s-%s.gem" repoArchivePath = "./data/meta/ruby/ruby_index.gz" repoJsonPath = "./data/meta/ruby/ruby_index.json" scriptPath = "./scripts/update_index.rb" concurrentTasksCount = 30 } javascript { + repository = "npm" repoIndexUrl = "https://replicate.npmjs.com/_all_docs?include_docs=true" - repoJsonPath = "./data/meta/npm/npm_packages_index.json" + packageUrl = "https://registry.npmjs.org/%1$s/-/%1$s-%2$s.tgz" concurrentTasksCount = 30 } } diff --git a/core/src/main/resources/db.migration/V1555715509__create_schema.sql b/core/src/main/resources/db.migration/V1555715509__create_schema.sql new file mode 100644 index 0000000..e69de29 diff --git a/core/src/main/scala/codesearch/core/Main.scala b/core/src/main/scala/codesearch/core/Main.scala index 576103c..6012ff2 100644 --- a/core/src/main/scala/codesearch/core/Main.scala +++ b/core/src/main/scala/codesearch/core/Main.scala @@ -22,33 +22,39 @@ object Main extends IOApp { lang: String = "all" ) - case class LangRep[A <: DefaultTable]( + case class LangRep[A, F[_]]( db: DefaultDB[A], langIndex: LanguageIndex[A], - metaDownloader: MetaDownloader[IO] + metaDownloader: MetaDownloader[F] ) - def run(args: List[String]): IO[ExitCode] = + def run(args: List[String]): IO[ExitCode] = { Resource.make(IO(AsyncHttpClientFs2Backend[IO]()))(client => IO(client.close())).use { implicit httpClient => for { - params <- CLI.params(args) config <- Config.load[IO] - - unarchiver = Unarchiver[IO] - implicit0(downloader: Downloader[IO]) = Downloader.create[IO] - - hackageMeta <- HackageMetaDownloader(config.languagesConfig.haskell, unarchiver, downloader) - cratesMeta <- CratesMetaDownloader(config.languagesConfig.rust, unarchiver, downloader) - gemMeta <- GemMetaDownloader(config.languagesConfig.ruby, downloader) - npmMeta <- NpmMetaDownloader(config.languagesConfig.javascript, downloader) - - langReps = Map( - "haskell" -> LangRep[HackageTable](HackageDB, HaskellIndex(config), hackageMeta), - "rust" -> LangRep[CratesTable](CratesDB, RustIndex(config), cratesMeta), - "ruby" -> LangRep[GemTable](GemDB, RubyIndex(config), gemMeta), - "javascript" -> LangRep[NpmTable](NpmDB, JavaScriptIndex(config), npmMeta) - ) - exitCode <- Program(langReps) >>= (_.run(params)) + _ <- FlywayMigration.migrate[IO](config.db) + exitCode <- Transactor.create[IO](config.db).use { xa => + for { + params <- CLI.params(args) + + unarchiver = Unarchiver[IO] + implicit0(downloader: Downloader[IO]) = Downloader.create[IO] + + hackageMeta <- HackageMetaDownloader(config.languagesConfig.haskell, unarchiver, downloader, xa) + cratesMeta <- CratesMetaDownloader(config.languagesConfig.rust, unarchiver, downloader) + gemMeta <- GemMetaDownloader(config.languagesConfig.ruby, downloader) + npmMeta <- NpmMetaDownloader(config.languagesConfig.javascript, downloader) + + langReps = Map( + "haskell" -> LangRep[HackageTable, IO](HackageDB, HaskellIndex(config), hackageMeta), + "rust" -> LangRep[CratesTable, IO](CratesDB, RustIndex(config), cratesMeta), + "ruby" -> LangRep[GemTable, IO](GemDB, RubyIndex(config), gemMeta), + "javascript" -> LangRep[NpmTable, IO](NpmDB, JavaScriptIndex(config), npmMeta) + ) + exitCode <- Program(langReps) >>= (_.run(params)) + } yield exitCode + } } yield exitCode } + } } diff --git a/core/src/main/scala/codesearch/core/Program.scala b/core/src/main/scala/codesearch/core/Program.scala index 514191a..8f92338 100644 --- a/core/src/main/scala/codesearch/core/Program.scala +++ b/core/src/main/scala/codesearch/core/Program.scala @@ -3,16 +3,24 @@ package codesearch.core import cats.effect._ import cats.instances.list._ import cats.syntax.applicative._ +import cats.syntax.flatMap._ import cats.syntax.foldable._ import cats.syntax.traverse._ +import cats.syntax.functor._ import codesearch.core.Main.{LangRep, Params} import codesearch.core.model.DefaultTable import io.chrisdavenport.log4cats.Logger +import codesearch.core.util.manatki.syntax.raise._ import io.chrisdavenport.log4cats.slf4j.Slf4jLogger -class Program(langReps: Map[String, LangRep[_ <: DefaultTable]], logger: Logger[IO]) { +case class InvalidLang(lang: String) extends RuntimeException(s"Unsupported language $lang") - def run(params: Params): IO[ExitCode] = +class Program[F[_]: Sync: ContextShift]( + langReps: Map[String, LangRep[_ <: DefaultTable]], + logger: Logger[F] +) { + + def run(params: Params): F[ExitCode] = for { _ <- if (params.lang == "all") { logger.info("Codesearch-core started for all supported languages") @@ -20,47 +28,38 @@ class Program(langReps: Map[String, LangRep[_ <: DefaultTable]], logger: Logger[ logger.info(s"Codesearch-core started for language ${params.lang}") } - _ <- initDb(params).whenA(params.initDB) _ <- downloadMeta(params).whenA(params.downloadMeta) _ <- updatePackages(params).whenA(params.updatePackages) _ <- buildIndex(params).whenA(params.buildIndex) } yield ExitCode.Success - object InvalidLang extends RuntimeException(s"Unsupported language") - - def findRepositories(lang: String): IO[List[LangRep[_]]] = { + def findRepositories(lang: String): F[List[LangRep[_]]] = { if (lang == "all") { - IO.pure(langReps.values.toList) + langReps.values.toList.pure[F].widen } else { langReps.get(lang) match { - case Some(l) => IO.pure(List(l)) - case None => IO.raiseError(InvalidLang) + case Some(l) => List(l).pure[F].widen + case None => InvalidLang(lang).raise } } } - def initDb(params: Params): IO[Unit] = - for { - languages <- findRepositories(params.lang) - _ <- languages.traverse_(_.db.initDB) - } yield () - - def downloadMeta(params: Params): IO[Unit] = { + def downloadMeta(params: Params): F[Unit] = { for { languages <- findRepositories(params.lang) - _ <- languages.traverse_(_.metaDownloader.downloadMeta) + _ <- languages.traverse_(_.metaDownloader.download) } yield () } - def updatePackages(params: Params): IO[Unit] = + def updatePackages(params: Params): F[Unit] = for { languages <- findRepositories(params.lang) updated <- languages.traverse(_.langIndex.updatePackages(params.limitedCountPackages)) _ <- logger.info(s"Updated: ${updated.sum}") } yield () - def buildIndex(params: Params): IO[Unit] = + def buildIndex(params: Params): F[Unit] = for { languages <- findRepositories(params.lang) _ <- languages.traverse_(_.langIndex.buildIndex) @@ -69,6 +68,7 @@ class Program(langReps: Map[String, LangRep[_ <: DefaultTable]], logger: Logger[ } object Program { - def apply(langReps: Map[String, LangRep[_ <: DefaultTable]]): IO[Program] = - Slf4jLogger.fromClass[IO](getClass).map(logger => new Program(langReps, logger)) + def apply[F[_]: Sync]( + langReps: Map[String, LangRep[_ <: DefaultTable]] + ): F[Program[F]] = Slf4jLogger.fromClass[F](getClass).map(logger => new Program(langReps, logger)) } diff --git a/core/src/main/scala/codesearch/core/config/CindexConfig.scala b/core/src/main/scala/codesearch/core/config/CindexConfig.scala new file mode 100644 index 0000000..c6e5459 --- /dev/null +++ b/core/src/main/scala/codesearch/core/config/CindexConfig.scala @@ -0,0 +1,7 @@ +package codesearch.core.config + +case class CindexConfig( + indexDir: String, + tempIndexDir: String, + packagesToIndexFile: String +) diff --git a/core/src/main/scala/codesearch/core/config/Config.scala b/core/src/main/scala/codesearch/core/config/Config.scala index 245b96d..5473778 100644 --- a/core/src/main/scala/codesearch/core/config/Config.scala +++ b/core/src/main/scala/codesearch/core/config/Config.scala @@ -7,6 +7,15 @@ import cats.effect.Sync import pureconfig.module.catseffect._ import pureconfig.{CamelCase, ConfigFieldMapping, ProductHint} +trait RepositoryConfig { + def repository: String + def repoIndexUrl: URI +} + +trait ArchivedIndexConfig extends RepositoryConfig { + def repoArchivePath: Path +} + case class Config( db: DatabaseConfig, snippetConfig: SnippetConfig, @@ -19,7 +28,13 @@ case class DatabaseConfig( port: Int, name: String, user: String, - password: String + password: String, + properties: DatabaseProperties +) + +case class DatabaseProperties( + driver: String, + url: String ) case class SnippetConfig( @@ -36,31 +51,57 @@ case class LanguagesConfig( ) case class HaskellConfig( + repository: String, repoIndexUrl: URI, repoArchivePath: Path, repoPath: Path, - concurrentTasksCount: Int -) + downloaderConfig: PackageDownloaderConfig +) extends ArchivedIndexConfig case class RubyConfig( + repository: String, repoIndexUrl: URI, repoArchivePath: Path, repoJsonPath: Path, scriptPath: Path, - concurrentTasksCount: Int -) + downloaderConfig: PackageDownloaderConfig +) extends ArchivedIndexConfig case class RustConfig( + repository: String, repoIndexUrl: URI, repoArchivePath: Path, repoPath: Path, - concurrentTasksCount: Int -) + ignoreFiles: Set[String], + downloaderConfig: PackageDownloaderConfig +) extends ArchivedIndexConfig case class JavaScriptConfig( + repository: String, repoIndexUrl: URI, - repoJsonPath: Path, - concurrentTasksCount: Int + downloaderConfig: PackageDownloaderConfig +) extends RepositoryConfig + +case class SourcesUpdaterConfig() + +case class PackageDownloaderConfig( + packageUrl: String, + packageArchivePath: String, + packageSourcesPath: String, + filterConfig: SourcesFilterConfig, +) + +case class SourcesFilterConfig( + allowedFileNames: Set[String] +) + +case class SourcesExtraConfig( + testDirs: Set[String], +) + +case class RateLimiterConfig( + numberTasks: Int, + per: Int ) case class MetricsConfig( diff --git a/core/src/main/scala/codesearch/core/config/SourcesFilesConfig.scala b/core/src/main/scala/codesearch/core/config/SourcesFilesConfig.scala new file mode 100644 index 0000000..d1a5077 --- /dev/null +++ b/core/src/main/scala/codesearch/core/config/SourcesFilesConfig.scala @@ -0,0 +1,12 @@ +package codesearch.core.config + +case class SourcesFilesConfig( + testDirsNames: Set[String], + allowedFileNames: Set[String], + filesExtensions: FilesExtensionsConfig +) + +case class FilesExtensionsConfig( + commonExtensions: Set[String], + sourceExtensions: Set[String], +) { def extensions: Set[String] = commonExtensions ++ sourceExtensions } diff --git a/core/src/main/scala/codesearch/core/db/DefaultDB.scala b/core/src/main/scala/codesearch/core/db/DefaultDB.scala index fa1adc3..25ac1be 100644 --- a/core/src/main/scala/codesearch/core/db/DefaultDB.scala +++ b/core/src/main/scala/codesearch/core/db/DefaultDB.scala @@ -70,9 +70,9 @@ trait DefaultDB[T <: DefaultTable] { } def initDB: IO[Unit] = - IO.fromFuture(IO(db.run(MTable.getTables))).flatMap { vector => + IO.fromFuture(IO(db.run(MTable.getTables))).flatMap { tables => IO( - if (!vector.exists(_.name.name == table.baseTableRow.tableName)) + if (!tables.exists(_.name.name == table.baseTableRow.tableName)) db.run(table.schema.create) ) } diff --git a/core/src/main/scala/codesearch/core/db/FlywayMigration.scala b/core/src/main/scala/codesearch/core/db/FlywayMigration.scala new file mode 100644 index 0000000..09a552e --- /dev/null +++ b/core/src/main/scala/codesearch/core/db/FlywayMigration.scala @@ -0,0 +1,19 @@ +package codesearch.core.db + +import cats.effect.Sync +import codesearch.core.config.DatabaseConfig +import org.flywaydb.core.Flyway + +object FlywayMigration { + def migrate[F[_]: Sync](config: DatabaseConfig): F[Unit] = Sync[F].delay { + Flyway + .configure() + .dataSource( + config.properties.url, + config.user, + config.password + ) + .load() + .migrate() + } +} diff --git a/core/src/main/scala/codesearch/core/db/Transactor.scala b/core/src/main/scala/codesearch/core/db/Transactor.scala new file mode 100644 index 0000000..ec7cce5 --- /dev/null +++ b/core/src/main/scala/codesearch/core/db/Transactor.scala @@ -0,0 +1,24 @@ +package codesearch.core.db + +import cats.effect.{Async, ContextShift, Resource} +import codesearch.core.config.DatabaseConfig +import doobie.hikari.HikariTransactor +import doobie.util.ExecutionContexts + +object Transactor { + def create[F[_]: Async: ContextShift](config: DatabaseConfig): Resource[F, HikariTransactor[F]] = { + import config._ + for { + connectEC <- ExecutionContexts.fixedThreadPool[F](32) + transactionEC <- ExecutionContexts.cachedThreadPool[F] + xa <- HikariTransactor.newHikariTransactor( + properties.driver, + properties.url, + user, + password, + connectEC, + transactionEC + ) + } yield xa + } +} diff --git a/core/src/main/scala/codesearch/core/db/repository/PackageDbRepository.scala b/core/src/main/scala/codesearch/core/db/repository/PackageDbRepository.scala new file mode 100644 index 0000000..04d1da7 --- /dev/null +++ b/core/src/main/scala/codesearch/core/db/repository/PackageDbRepository.scala @@ -0,0 +1,44 @@ +package codesearch.core.db.repository + +import cats.Monad +import doobie.Transactor +import doobie.implicits._ +import fs2.Stream + +final case class Package( + name: String, + version: String +) + +final case class PackageTableRow( + name: String, + version: String, + repository: String +) + +trait PackageDbRepository[F[_]] { + def upsert(name: String, version: String, repository: String): F[Int] + def findByRepository(repository: String): Stream[F, Package] +} + +object PackageDbRepository { + def apply[F[_]: Monad](xa: Transactor[F]): PackageDbRepository[F] = new PackageDbRepository[F] { + def upsert(name: String, version: String, repository: String): F[Int] = { + sql""" + INSERT INTO package(name, version, repository, updated_at) + VALUES ($name, $version, $repository, now()) + ON CONFLICT (name, repository) DO UPDATE + SET version = excluded.version, + updated_at = excluded.updated_at + """.update.run.transact(xa) + } + + def findByRepository(repository: String): Stream[F, Package] = { + sql""" + SELECT name, version + FROM package + WHERE repository = $repository + """.query[Package].stream.transact(xa) + } + } +} diff --git a/core/src/main/scala/codesearch/core/db/repository/PackageIndexDbRepository.scala b/core/src/main/scala/codesearch/core/db/repository/PackageIndexDbRepository.scala new file mode 100644 index 0000000..62d0071 --- /dev/null +++ b/core/src/main/scala/codesearch/core/db/repository/PackageIndexDbRepository.scala @@ -0,0 +1,57 @@ +package codesearch.core.db.repository + +import cats.Monad +import cats.implicits._ +import doobie._ +import doobie.implicits._ +import fs2.Stream + +final case class PackageIndexTableRow( + name: String, + version: String, + repository: String +) + +final case class PackageIndex( + name: String, + version: String +) + +trait PackageIndexRepository[F[_]] { + def batchUpsert(packages: List[PackageIndexTableRow]): F[Int] + def batchUpsert(stream: Stream[F, PackageIndexTableRow]): F[Int] + def findLatestByRepository(repository: String): Stream[F, PackageIndexTableRow] +} + +object PackageIndexDbRepository { + def apply[F[_]: Monad](xa: Transactor[F]): PackageIndexRepository[F] = new PackageIndexRepository[F] { + + def batchUpsert(packages: List[PackageIndexTableRow]): F[Int] = { + Update[PackageIndexTableRow]( + """ + |INSERT INTO repository_index(name, version, repository) + |VALUES (?, ?, ?) + |ON CONFLICT (name, repository) DO UPDATE + | SET version = excluded.version + """.stripMargin + ).updateMany(packages).transact(xa) + } + + def batchUpsert(stream: Stream[F, PackageIndexTableRow], batchSize: Int = 10000): F[Int] = { + stream + .chunkN(batchSize) + .map(packages => batchUpsert(packages.toList)) + .compile + .drain + } + + def findLatestByRepository(repository: String): Stream[F, PackageIndexTableRow] = { + sql""" + SELECT r.name, r.version, r.repository + FROM repository_index r + LEFT JOIN package p + ON r.name <> p.name AND r.version <> p.version + """.query[PackageIndexTableRow].stream.transact(xa) + } + } +} diff --git a/core/src/main/scala/codesearch/core/index/JavaScriptIndex.scala b/core/src/main/scala/codesearch/core/index/JavaScriptIndex.scala deleted file mode 100644 index 0b429d0..0000000 --- a/core/src/main/scala/codesearch/core/index/JavaScriptIndex.scala +++ /dev/null @@ -1,42 +0,0 @@ -package codesearch.core.index - -import java.nio.file.Path - -import cats.effect.{ContextShift, IO} -import cats.syntax.flatMap._ -import codesearch.core.config.{Config, JavaScriptConfig} -import codesearch.core.db.NpmDB -import codesearch.core.index.details.NpmDetails -import codesearch.core.index.repository.{NpmPackage, SourcesDownloader} -import codesearch.core.index.directory.Directory._ -import codesearch.core.index.directory.Directory.ops._ -import codesearch.core.index.directory.СindexDirectory -import codesearch.core.index.directory.СindexDirectory.JavaScriptCindex -import codesearch.core.model.NpmTable -import fs2.Stream - -class JavaScriptIndex(config: JavaScriptConfig)( - implicit val shift: ContextShift[IO], - sourcesDownloader: SourcesDownloader[IO, NpmPackage] -) extends LanguageIndex[NpmTable] with NpmDB { - - override protected val cindexDir: СindexDirectory = JavaScriptCindex - - override protected def concurrentTasksCount: Int = config.concurrentTasksCount - - override protected def updateSources(name: String, version: String): IO[Int] = { - logger.info(s"downloading package $name") >> archiveDownloadAndExtract(NpmPackage(name, version)) - } - - override protected def getLastVersions: Stream[IO, (String, String)] = NpmDetails(config).detailsMap - - override protected def buildFsUrl(packageName: String, version: String): Path = - NpmPackage(packageName, version).packageDir -} - -object JavaScriptIndex { - def apply(config: Config)( - implicit shift: ContextShift[IO], - sourcesDownloader: SourcesDownloader[IO, NpmPackage] - ) = new JavaScriptIndex(config.languagesConfig.javascript) -} diff --git a/core/src/main/scala/codesearch/core/index/LanguageIndex.scala b/core/src/main/scala/codesearch/core/index/LanguageIndex.scala index d3d5d33..d4a73cd 100644 --- a/core/src/main/scala/codesearch/core/index/LanguageIndex.scala +++ b/core/src/main/scala/codesearch/core/index/LanguageIndex.scala @@ -6,10 +6,14 @@ import java.nio.file.{Files, Path => NioPath} import cats.effect.{ContextShift, IO} import cats.instances.int._ +import cats.instances.vector._ import cats.syntax.flatMap._ +import cats.syntax.foldable._ import cats.syntax.functor._ +import cats.syntax.monadError._ import codesearch.core.BlockingEC import codesearch.core.db.DefaultDB +import codesearch.core.index.directory.{Directory, СSearchDirectory} import codesearch.core.index.directory.{Directory, СindexDirectory} import codesearch.core.index.repository._ import codesearch.core.model.DefaultTable @@ -29,6 +33,7 @@ trait LanguageIndex[A <: DefaultTable] { protected val logger: SelfAwareStructuredLogger[IO] = Slf4jLogger.unsafeCreate[IO] + protected def csearchDir: СSearchDirectory protected def cindexDir: СindexDirectory protected def concurrentTasksCount: Int @@ -45,31 +50,45 @@ trait LanguageIndex[A <: DefaultTable] { } } + def dropTempIndexFile = IO(Files.deleteIfExists(csearchDir.tempIndexDirAs[NioPath])) def dropTempIndexFile = IO(Files.deleteIfExists(cindexDir.tempIndexDirAs[NioPath])) def createCSearchDir = IO( - if (Files.notExists(СindexDirectory.root)) - Files.createDirectories(СindexDirectory.root) + if (Files.notExists(СSearchDirectory.root)) + Files.createDirectories(СSearchDirectory.root) + if (Files.notExists(СindexDirectory.root)) + Files.createDirectories(СindexDirectory.root) ) def indexPackages(packageDirs: Seq[NioPath]): IO[Unit] = { + def cindex(packages: Seq[NioPath]) = { + val args = "cindex" +: packages.map(_.toString) + val env = Seq("CSEARCHINDEX" -> csearchDir.tempIndexDirAs[String]) + IO { Process(args, None, env: _*) ! } + .ensureOr(BadExitCode)(_ == 0) + } + + val batchSize = 10000 + packageDirs.grouped(batchSize).toVector.traverse_(cindex) val args = Seq("cindex", cindexDir.dirsToIndex[String]) val env = Seq("CSEARCHINDEX" -> cindexDir.tempIndexDirAs[String]) for { _ <- Stream - .emits(packageDirs) - .covary[IO] - .map(_.toString + "\n") - .through(utf8Encode) - .to(file.writeAll(cindexDir.dirsToIndex[NioPath], BlockingEC, List(CREATE, TRUNCATE_EXISTING))) - .compile - .drain + .emits(packageDirs) + .covary[IO] + .map(_.toString + "\n") + .through(utf8Encode) + .to(file.writeAll(cindexDir.dirsToIndex[NioPath], BlockingEC, List(CREATE, TRUNCATE_EXISTING))) + .compile + .drain _ <- IO(Process(args, None, env: _*) !) } yield () } def replaceIndexFile = IO( Files.move( + csearchDir.tempIndexDirAs[NioPath], + csearchDir.indexDirAs[NioPath], cindexDir.tempIndexDirAs[NioPath], cindexDir.indexDirAs[NioPath], REPLACE_EXISTING @@ -96,11 +115,11 @@ trait LanguageIndex[A <: DefaultTable] { } logger.debug("UPDATE PACKAGES") >> limit - .map(packages.take(_)) - .getOrElse(packages) - .mapAsyncUnordered(concurrentTasksCount)(updateSources _ tupled) - .compile - .foldMonoid + .map(packages.take(_)) + .getOrElse(packages) + .mapAsyncUnordered(concurrentTasksCount)(updateSources _ tupled) + .compile + .foldMonoid } /** @@ -142,3 +161,4 @@ trait LanguageIndex[A <: DefaultTable] { } case class BadExitCode(code: Int) extends Exception(s"Process returned a bad exit code: $code") + diff --git a/core/src/main/scala/codesearch/core/index/RubyIndex.scala b/core/src/main/scala/codesearch/core/index/RubyIndex.scala deleted file mode 100644 index 2f3c8c1..0000000 --- a/core/src/main/scala/codesearch/core/index/RubyIndex.scala +++ /dev/null @@ -1,50 +0,0 @@ -package codesearch.core.index - -import java.nio.file.Path - -import cats.effect.{ContextShift, IO} -import cats.syntax.flatMap._ -import codesearch.core._ -import codesearch.core.config.{Config, RubyConfig} -import codesearch.core.db.GemDB -import codesearch.core.index.directory.Directory._ -import codesearch.core.index.directory.Directory.ops._ -import codesearch.core.index.directory.СindexDirectory -import codesearch.core.index.directory.СindexDirectory.RubyCindex -import codesearch.core.index.repository.{GemPackage, SourcesDownloader} -import codesearch.core.model.GemTable -import io.circe.fs2._ -import fs2.Stream -import fs2.io.file - -class RubyIndex(rubyConfig: RubyConfig)( - implicit val shift: ContextShift[IO], - sourcesDownloader: SourcesDownloader[IO, GemPackage] -) extends LanguageIndex[GemTable] with GemDB { - - override protected val cindexDir: СindexDirectory = RubyCindex - - override protected def concurrentTasksCount: Int = rubyConfig.concurrentTasksCount - - override protected def updateSources(name: String, version: String): IO[Int] = { - logger.info(s"downloading package $name") >> archiveDownloadAndExtract(GemPackage(name, version)) - } - - override protected def getLastVersions: Stream[IO, (String, String)] = { - file - .readAll[IO](rubyConfig.repoJsonPath, BlockingEC, 4096) - .through(byteArrayParser[IO]) - .through(decoder[IO, Seq[String]]) - .collect { case Seq(name, version, _) => name -> version } - } - - override protected def buildFsUrl(packageName: String, version: String): Path = - GemPackage(packageName, version).packageDir -} - -object RubyIndex { - def apply(config: Config)( - implicit shift: ContextShift[IO], - sourcesDownloader: SourcesDownloader[IO, GemPackage] - ) = new RubyIndex(config.languagesConfig.ruby) -} diff --git a/core/src/main/scala/codesearch/core/index/RustIndex.scala b/core/src/main/scala/codesearch/core/index/RustIndex.scala deleted file mode 100644 index e6ec163..0000000 --- a/core/src/main/scala/codesearch/core/index/RustIndex.scala +++ /dev/null @@ -1,65 +0,0 @@ -package codesearch.core.index - -import java.nio.file.Path - -import cats.effect.{ContextShift, IO} -import cats.syntax.flatMap._ -import codesearch.core.config.{Config, RustConfig} -import codesearch.core.db.CratesDB -import codesearch.core.index.directory.Directory._ -import codesearch.core.index.directory.Directory.ops._ -import codesearch.core.index.directory.СindexDirectory -import codesearch.core.index.directory.СindexDirectory.RustCindex -import codesearch.core.index.repository.{CratesPackage, SourcesDownloader} -import codesearch.core.model.CratesTable -import codesearch.core.util.Helper -import fs2.Stream -import io.circe.Decoder -import io.circe.fs2._ - -class RustIndex(rustConfig: RustConfig)( - implicit val shift: ContextShift[IO], - sourcesDownloader: SourcesDownloader[IO, CratesPackage] -) extends LanguageIndex[CratesTable] with CratesDB { - - private val IgnoreFiles = Set( - "test-max-version-example-crate", - "version-length-checking-is-overrated", - "config.json", - "archive.zip" - ) - - override protected val cindexDir: СindexDirectory = RustCindex - - override protected def concurrentTasksCount: Int = rustConfig.concurrentTasksCount - - override protected def updateSources(name: String, version: String): IO[Int] = { - logger.info(s"downloading package $name") >> archiveDownloadAndExtract(CratesPackage(name, version)) - } - - override protected def getLastVersions: Stream[IO, (String, String)] = { - implicit val packageDecoder: Decoder[(String, String)] = { c => - for { - name <- c.get[String]("name") - version <- c.get[String]("vers") - } yield name -> version - } - - Helper - .recursiveListFiles(rustConfig.repoPath.toFile) - .filter(file => !IgnoreFiles.contains(file.getName)) - .evalMap(file => Helper.readFileAsync(file.getAbsolutePath).map(_.last)) - .through(stringStreamParser) - .through(decoder[IO, (String, String)]) - } - - override protected def buildFsUrl(packageName: String, version: String): Path = - CratesPackage(packageName, version).packageDir -} - -object RustIndex { - def apply(config: Config)( - implicit shift: ContextShift[IO], - sourcesDownloader: SourcesDownloader[IO, CratesPackage] - ) = new RustIndex(config.languagesConfig.rust) -} diff --git a/core/src/main/scala/codesearch/core/index/details/NpmDetails.scala b/core/src/main/scala/codesearch/core/index/details/NpmDetails.scala deleted file mode 100644 index 7fa59e3..0000000 --- a/core/src/main/scala/codesearch/core/index/details/NpmDetails.scala +++ /dev/null @@ -1,39 +0,0 @@ -package codesearch.core.index.details - -import cats.effect.{ContextShift, IO} -import codesearch.core._ -import codesearch.core.config.JavaScriptConfig -import fs2.Stream -import fs2.io._ -import io.circe.fs2._ -import io.circe.generic.auto._ -import io.circe.{Decoder, HCursor} - -import scala.language.higherKinds - -private final case class NpmRegistryPackage(name: String, version: String) -private final case class NpmPackage(name: String, version: String) - -private[index] final class NpmDetails(config: JavaScriptConfig)(implicit shift: ContextShift[IO]) { - - private implicit val docDecoder: Decoder[NpmRegistryPackage] = (c: HCursor) => { - val doc = c.downField("doc") - for { - name <- doc.get[String]("name") - distTag = doc.downField("dist-tags") - tag <- distTag.get[String]("latest") - } yield NpmRegistryPackage(name, tag) - } - - def detailsMap: Stream[IO, (String, String)] = { - file - .readAll[IO](config.repoJsonPath, BlockingEC, chunkSize = 4096) - .through(byteStreamParser[IO]) - .through(decoder[IO, NpmPackage]) - .map(npmPackage => npmPackage.name -> npmPackage.version) - } -} - -private[index] object NpmDetails { - def apply(config: JavaScriptConfig)(implicit shift: ContextShift[IO]) = new NpmDetails(config) -} diff --git a/core/src/main/scala/codesearch/core/index/directory/Extractor.scala b/core/src/main/scala/codesearch/core/index/directory/Extractor.scala index 0cb2eea..00b7ffe 100644 --- a/core/src/main/scala/codesearch/core/index/directory/Extractor.scala +++ b/core/src/main/scala/codesearch/core/index/directory/Extractor.scala @@ -1,6 +1,5 @@ package codesearch.core.index.directory -import java.io.File import java.nio.file.Path import cats.effect.Sync @@ -18,7 +17,7 @@ private[index] trait Extractor { * @param from is file to unarchiving * @param to is target directory */ - def unzipUsingMethod[F[_]](from: Path, to: Path)(implicit F: Sync[F]): F[Unit] = F.delay( + def unzipUsingMethod[F[_]: Sync](from: Path, to: Path): F[Unit] = Sync[F].delay( ArchiverFactory .createArchiver(TAR, GZIP) .extract(from.toFile, to.toFile) @@ -41,7 +40,7 @@ private[index] trait Extractor { * @param unarchived is directory contains unarchived files * @return same directory containing all files and directories from unarchived files */ - def flatDir[F[_]](unarchived: Path)(implicit F: Sync[F]): F[Path] = F.delay { + def flatDir[F[_]: Sync](unarchived: Path): F[Path] = Sync[F].delay { val dir = unarchived.toFile dir.listFiles .filter(_.isDirectory) diff --git a/core/src/main/scala/codesearch/core/index/indexer/HaskellIndexer.scala b/core/src/main/scala/codesearch/core/index/indexer/HaskellIndexer.scala new file mode 100644 index 0000000..fc332ba --- /dev/null +++ b/core/src/main/scala/codesearch/core/index/indexer/HaskellIndexer.scala @@ -0,0 +1,11 @@ +package codesearch.core.index.indexer + +import cats.effect.{ContextShift, Sync} +import codesearch.core.index.directory.СindexDirectory.HaskellCindex +import doobie.util.transactor.Transactor + +object HaskellIndexer { + def apply[F[_]: Sync: ContextShift]( + xa: Transactor[F] + ): SourcesIndexer[F] = new SourcesIndexer(HaskellCindex, "hackage", xa) +} diff --git a/core/src/main/scala/codesearch/core/index/indexer/Indexer.scala b/core/src/main/scala/codesearch/core/index/indexer/Indexer.scala new file mode 100644 index 0000000..a0516dd --- /dev/null +++ b/core/src/main/scala/codesearch/core/index/indexer/Indexer.scala @@ -0,0 +1,84 @@ +package codesearch.core.index.indexer + +import java.nio.file.StandardCopyOption.REPLACE_EXISTING +import java.nio.file.StandardOpenOption.{CREATE, TRUNCATE_EXISTING} +import java.nio.file.{Files, Path => NioPath} + +import cats.effect._ +import cats.syntax.flatMap._ +import cats.syntax.functor._ +import codesearch.core.BlockingEC +import codesearch.core.db.repository.{Package, PackageDbRepository} +import codesearch.core.index.directory.{Directory, СindexDirectory} +import codesearch.core.syntax.path._ +import doobie.util.transactor.Transactor +import fs2.io.file +import fs2.text.utf8Encode +import fs2.{Pipe, Stream} + +import scala.sys.process.Process + +private[indexer] trait Indexer[F[_]] { + def index: F[Unit] +} + +private[indexer] class SourcesIndexer[F[_]: Sync: ContextShift]( + indexDir: СindexDirectory, + repository: String, + xa: Transactor[F] +) extends Indexer[F] { + + def index: F[Unit] = { + for { + packageDirs <- latestPackagePaths + _ <- createCSearchDir + _ <- dropTempIndexFile + _ <- dirsToIndex(packageDirs) + _ <- indexPackages + _ <- replaceIndexFile + } yield () + } + + private def latestPackagePaths: F[Stream[F, NioPath]] = Sync[F].pure( + PackageDbRepository[F](xa) + .findByRepository(repository) + .through(buildFsPath) + ) + + private def buildFsPath: Pipe[F, Package, NioPath] = { input => + input.map(`package` => Directory.sourcesDir / repository / `package`.name / `package`.version) + } + + private def createCSearchDir: F[Option[NioPath]] = Sync[F].delay( + if (Files.notExists(СindexDirectory.root)) + Some(Files.createDirectories(СindexDirectory.root)) + else None + ) + + private def dropTempIndexFile: F[Boolean] = + Sync[F].delay(Files.deleteIfExists(indexDir.tempIndexDirAs[NioPath])) + + private def dirsToIndex(stream: Stream[F, NioPath]): F[Unit] = { + stream + .map(_.toString + "\n") + .through(utf8Encode) + .through(file.writeAll(indexDir.dirsToIndex[NioPath], BlockingEC, List(CREATE, TRUNCATE_EXISTING))) + .compile + .drain + } + + private def indexPackages: F[Unit] = { + val args = Seq("cindex", indexDir.dirsToIndex[String]) + val env = Seq("CSEARCHINDEX" -> indexDir.tempIndexDirAs[String]) + Sync[F].delay(Process(args, None, env: _*) !).void + } + + private def replaceIndexFile: F[NioPath] = + Sync[F].delay( + Files.move( + indexDir.tempIndexDirAs[NioPath], + indexDir.indexDirAs[NioPath], + REPLACE_EXISTING + ) + ) +} diff --git a/core/src/main/scala/codesearch/core/index/indexer/JavaScriptIndexer.scala b/core/src/main/scala/codesearch/core/index/indexer/JavaScriptIndexer.scala new file mode 100644 index 0000000..683f204 --- /dev/null +++ b/core/src/main/scala/codesearch/core/index/indexer/JavaScriptIndexer.scala @@ -0,0 +1,11 @@ +package codesearch.core.index.indexer + +import cats.effect.{ContextShift, Sync} +import codesearch.core.index.directory.СindexDirectory.JavaScriptCindex +import doobie.util.transactor.Transactor + +object JavaScriptIndexer { + def apply[F[_]: Sync: ContextShift]( + xa: Transactor[F] + ): SourcesIndexer[F] = new SourcesIndexer[F](JavaScriptCindex, "npm", xa) +} diff --git a/core/src/main/scala/codesearch/core/index/indexer/RubyIndexer.scala b/core/src/main/scala/codesearch/core/index/indexer/RubyIndexer.scala new file mode 100644 index 0000000..3f46991 --- /dev/null +++ b/core/src/main/scala/codesearch/core/index/indexer/RubyIndexer.scala @@ -0,0 +1,11 @@ +package codesearch.core.index.indexer + +import cats.effect.{ContextShift, Sync} +import codesearch.core.index.directory.СindexDirectory.RubyCindex +import doobie.util.transactor.Transactor + +object RubyIndexer { + def apply[F[_]: Sync: ContextShift]( + xa: Transactor[F] + ): SourcesIndexer[F] = new SourcesIndexer(RubyCindex, "gem", xa) +} diff --git a/core/src/main/scala/codesearch/core/index/indexer/RustIndexer.scala b/core/src/main/scala/codesearch/core/index/indexer/RustIndexer.scala new file mode 100644 index 0000000..b76b042 --- /dev/null +++ b/core/src/main/scala/codesearch/core/index/indexer/RustIndexer.scala @@ -0,0 +1,11 @@ +package codesearch.core.index.indexer + +import cats.effect.{ContextShift, Sync} +import codesearch.core.index.directory.СindexDirectory.RustCindex +import doobie.util.transactor.Transactor + +object RustIndexer { + def apply[F[_]: Sync: ContextShift]( + xa: Transactor[F] + ): SourcesIndexer[F] = new SourcesIndexer(RustCindex, "crates", xa) +} diff --git a/core/src/main/scala/codesearch/core/index/repository/Downloader.scala b/core/src/main/scala/codesearch/core/index/repository/Downloader.scala index 38c3bff..26f25f7 100644 --- a/core/src/main/scala/codesearch/core/index/repository/Downloader.scala +++ b/core/src/main/scala/codesearch/core/index/repository/Downloader.scala @@ -1,6 +1,5 @@ package codesearch.core.index.repository -import java.io.File import java.nio.ByteBuffer import java.nio.file.Path import java.nio.file.StandardOpenOption.{CREATE, TRUNCATE_EXISTING} @@ -29,7 +28,10 @@ object Downloader { def apply[F[_]: Downloader]: Downloader[F] = implicitly - def create[F[_]: ContextShift](implicit http: SttpBackend[F, Stream[F, ByteBuffer]], F: Sync[F]): Downloader[F] = + def create[F[_]: ContextShift]( + implicit http: SttpBackend[F, Stream[F, ByteBuffer]], + F: Sync[F] + ): Downloader[F] = new Downloader[F] { /** diff --git a/core/src/main/scala/codesearch/core/meta/CratesMetaDownloader.scala b/core/src/main/scala/codesearch/core/meta/CratesMetaDownloader.scala deleted file mode 100644 index d4e0030..0000000 --- a/core/src/main/scala/codesearch/core/meta/CratesMetaDownloader.scala +++ /dev/null @@ -1,41 +0,0 @@ -package codesearch.core.meta - -import cats.Monad -import cats.effect.Sync -import cats.syntax.flatMap._ -import cats.syntax.functor._ -import codesearch.core.config.RustConfig -import codesearch.core.index.repository.Downloader -import codesearch.core.util.Unarchiver -import com.softwaremill.sttp.Uri -import io.chrisdavenport.log4cats.Logger -import io.chrisdavenport.log4cats.slf4j.Slf4jLogger -import org.rauschig.jarchivelib.ArchiveFormat.ZIP - -class CratesMetaDownloader[F[_]: Monad]( - config: RustConfig, - unarchiver: Unarchiver[F], - downloader: Downloader[F], - logger: Logger[F] -) extends MetaDownloader[F] { - - def downloadMeta: F[Unit] = { - for { - _ <- logger.info("Downloading rust meta information") - archive <- downloader.download(Uri(config.repoIndexUrl), config.repoArchivePath) - _ <- unarchiver.extract(archive, config.repoPath, ZIP) - _ <- logger.info("Downloading finished") - } yield () - } -} - -object CratesMetaDownloader { - def apply[F[_]: Sync]( - config: RustConfig, - unarchiver: Unarchiver[F], - downloader: Downloader[F] - ): F[MetaDownloader[F]] = - for { - logger <- Slf4jLogger.create - } yield new CratesMetaDownloader(config, unarchiver, downloader, logger) -} diff --git a/core/src/main/scala/codesearch/core/meta/GemMetaDownloader.scala b/core/src/main/scala/codesearch/core/meta/GemMetaDownloader.scala deleted file mode 100644 index 97ee37e..0000000 --- a/core/src/main/scala/codesearch/core/meta/GemMetaDownloader.scala +++ /dev/null @@ -1,33 +0,0 @@ -package codesearch.core.meta - -import cats.effect.Sync -import cats.syntax.flatMap._ -import cats.syntax.functor._ -import codesearch.core.config.RubyConfig -import codesearch.core.index.repository.Downloader -import com.softwaremill.sttp._ -import io.chrisdavenport.log4cats.Logger -import io.chrisdavenport.log4cats.slf4j.Slf4jLogger - -import scala.sys.process._ - -class GemMetaDownloader[F[_]: Sync](config: RubyConfig, downloader: Downloader[F], logger: Logger[F]) - extends MetaDownloader[F] { - - def downloadMeta: F[Unit] = - for { - _ <- logger.info("Downloading ruby meta information") - _ <- downloader.download(Uri(config.repoIndexUrl), config.repoArchivePath) - _ <- Sync[F].delay { - Seq("ruby", config.scriptPath.toString, config.repoArchivePath.toString, config.repoJsonPath.toString) !! - } - _ <- logger.info("Downloading finished") - } yield () -} - -object GemMetaDownloader { - def apply[F[_]: Sync](config: RubyConfig, downloader: Downloader[F]): F[MetaDownloader[F]] = - for { - logger <- Slf4jLogger.create - } yield new GemMetaDownloader(config, downloader, logger) -} diff --git a/core/src/main/scala/codesearch/core/meta/HackageMetaDownloader.scala b/core/src/main/scala/codesearch/core/meta/HackageMetaDownloader.scala deleted file mode 100644 index d114367..0000000 --- a/core/src/main/scala/codesearch/core/meta/HackageMetaDownloader.scala +++ /dev/null @@ -1,41 +0,0 @@ -package codesearch.core.meta - -import cats.Monad -import cats.effect.Sync -import cats.syntax.flatMap._ -import cats.syntax.functor._ -import codesearch.core.config.HaskellConfig -import codesearch.core.index.repository.Downloader -import codesearch.core.util.Unarchiver -import com.softwaremill.sttp._ -import io.chrisdavenport.log4cats.Logger -import io.chrisdavenport.log4cats.slf4j.Slf4jLogger -import org.rauschig.jarchivelib.ArchiveFormat.TAR -import org.rauschig.jarchivelib.CompressionType.GZIP - -class HackageMetaDownloader[F[_]: Monad]( - config: HaskellConfig, - unarchiver: Unarchiver[F], - downloader: Downloader[F], - logger: Logger[F] -) extends MetaDownloader[F] { - - def downloadMeta: F[Unit] = - for { - _ <- logger.info("Downloading haskell meta information") - _ <- downloader.download(Uri(config.repoIndexUrl), config.repoArchivePath) - _ <- unarchiver.extract(config.repoArchivePath, config.repoPath, TAR, GZIP) - _ <- logger.info("Downloading finished") - } yield () -} - -object HackageMetaDownloader { - def apply[F[_]: Sync]( - config: HaskellConfig, - unarchiver: Unarchiver[F], - downloader: Downloader[F] - ): F[MetaDownloader[F]] = - for { - logger <- Slf4jLogger.create - } yield new HackageMetaDownloader(config, unarchiver, downloader, logger) -} diff --git a/core/src/main/scala/codesearch/core/meta/MetaDownloader.scala b/core/src/main/scala/codesearch/core/meta/MetaDownloader.scala deleted file mode 100644 index 95b72a9..0000000 --- a/core/src/main/scala/codesearch/core/meta/MetaDownloader.scala +++ /dev/null @@ -1,10 +0,0 @@ -package codesearch.core.meta - -trait MetaDownloader[F[_]] { - - /** - * Download meta information about packages from remote repository - * e.g. for Haskell is list of versions and cabal file for each version - */ - def downloadMeta: F[Unit] -} diff --git a/core/src/main/scala/codesearch/core/meta/NpmMetaDownloader.scala b/core/src/main/scala/codesearch/core/meta/NpmMetaDownloader.scala deleted file mode 100644 index 5f81d64..0000000 --- a/core/src/main/scala/codesearch/core/meta/NpmMetaDownloader.scala +++ /dev/null @@ -1,111 +0,0 @@ -package codesearch.core.meta - -import java.nio.file.StandardOpenOption.{CREATE, TRUNCATE_EXISTING} - -import cats.effect.{ContextShift, Sync} -import cats.syntax.flatMap._ -import cats.syntax.functor._ -import codesearch.core.BlockingEC -import codesearch.core.config.JavaScriptConfig -import codesearch.core.index.repository.Downloader -import com.softwaremill.sttp._ -import fs2.io.file -import fs2.text._ -import fs2.{Pipe, Stream} -import fs2json._ -import io.chrisdavenport.log4cats.Logger -import io.chrisdavenport.log4cats.slf4j.Slf4jLogger -import io.circe._ -import io.circe.fs2._ -import io.circe.generic.auto._ -import io.circe.syntax._ - -class NpmMetaDownloader[F[_]: Sync: ContextShift]( - config: JavaScriptConfig, - downloader: Downloader[F], - logger: Logger[F] -) extends MetaDownloader[F] { - - def downloadMeta: F[Unit] = - for { - _ <- logger.info("Downloading javascript meta information") - _ <- Sync[F].delay(config.repoJsonPath.getParent.toFile.mkdirs()) - _ <- downloader - .download(Uri(config.repoIndexUrl)) - .through(tokenParser[F]) - .through(tokenFilter) - .through(prettyPrinter()) - .through(cutStream) - .through(byteArrayParser[F]) - .through(decoder[NpmRegistryPackage]) - .map(_.asJson.noSpaces + "\n") - .through(utf8Encode) - .to(file.writeAll(config.repoJsonPath, BlockingEC, List(CREATE, TRUNCATE_EXISTING))) - .compile - .drain - _ <- logger.info("Downloading finished") - } yield () - - def tokenFilter: Pipe[F, JsonToken, JsonToken] = - TokenFilter.downObject - .downField("rows") - .downArray - .downObject - .downField("doc") - .downObject - .removeFields( - Set( - "_id", - "_rev", - "versions", - "description", - "maintainers", - "homepage", - "keywords", - "readme", - "author", - "bugs", - "license", - "readmeFilename" - ) - ) - - def cutStream: Pipe[F, Byte, Byte] = { input => - var depth = 0 - input.filter { byte => - if (byte == '[') { - depth += 1; true - } else if (byte == ']') { - depth -= 1; true - } else depth > 0 - } - } - - def decoder[A](implicit decode: Decoder[A]): Pipe[F, Json, A] = - _.flatMap { json => - decode(json.hcursor) match { - case Left(_) => Stream.empty - case Right(a) => Stream.emit(a) - } - } -} - -final case class NpmRegistryPackage(name: String, version: String) - -object NpmRegistryPackage { - implicit val docDecoder: Decoder[NpmRegistryPackage] = { c => - val doc = c.downField("doc") - for { - name <- doc.get[String]("name") - distTag = doc.downField("dist-tags") - tag <- distTag.get[String]("latest") - } yield NpmRegistryPackage(name, tag) - } -} - -object NpmMetaDownloader { - def apply[F[_]: Sync: ContextShift](config: JavaScriptConfig, downloader: Downloader[F]): F[MetaDownloader[F]] = - for { - logger <- Slf4jLogger.create - } yield new NpmMetaDownloader(config, downloader, logger) -} diff --git a/core/src/main/scala/codesearch/core/meta/downloader/ArchivedIndexDownloader.scala b/core/src/main/scala/codesearch/core/meta/downloader/ArchivedIndexDownloader.scala new file mode 100644 index 0000000..d0e4a74 --- /dev/null +++ b/core/src/main/scala/codesearch/core/meta/downloader/ArchivedIndexDownloader.scala @@ -0,0 +1,31 @@ +package codesearch.core.meta.downloader + +import cats.effect.{ContextShift, Sync} +import cats.syntax.flatMap._ +import cats.syntax.functor._ +import codesearch.core.config.ArchivedIndexConfig +import codesearch.core.db.repository.PackageIndexRepository +import codesearch.core.index.repository.Downloader +import codesearch.core.meta.unarchiver.StreamIndexUnarchiver +import com.softwaremill.sttp.Uri +import io.chrisdavenport.log4cats.Logger +import org.apache.commons.io.FileUtils + +private[meta] class ArchivedIndexDownloader[F[_]: Sync: ContextShift]( + config: ArchivedIndexConfig, + downloader: Downloader[F], + unarchiver: StreamIndexUnarchiver[F], + indexRepository: PackageIndexRepository[F], + logger: Logger[F] +) extends RepositoryIndexDownloader[F] { + + def download: F[Unit] = + for { + _ <- logger.info(s"Downloading ${config.repository} meta information") + archive <- downloader.download(Uri(config.repoIndexUrl), config.repoArchivePath) + stream <- unarchiver.unarchiveToStream(archive) + _ <- indexRepository.batchUpsert(stream) + _ <- Sync[F].delay(FileUtils.cleanDirectory(config.repoArchivePath.getParent.toFile)) + _ <- logger.info("Downloading finished") + } yield () +} diff --git a/core/src/main/scala/codesearch/core/meta/downloader/ByteStreamIndexDownloader.scala b/core/src/main/scala/codesearch/core/meta/downloader/ByteStreamIndexDownloader.scala new file mode 100644 index 0000000..33dd388 --- /dev/null +++ b/core/src/main/scala/codesearch/core/meta/downloader/ByteStreamIndexDownloader.scala @@ -0,0 +1,30 @@ +package codesearch.core.meta.downloader + +import cats.effect.{ContextShift, Sync} +import cats.syntax.applicative._ +import cats.syntax.flatMap._ +import cats.syntax.functor._ +import codesearch.core.config.RepositoryConfig +import codesearch.core.db.repository.PackageIndexRepository +import codesearch.core.index.repository.Downloader +import codesearch.core.meta.parser.IndexByteStreamParser +import com.softwaremill.sttp.Uri +import io.chrisdavenport.log4cats.Logger + +private[meta] class ByteStreamIndexDownloader[F[_]: Sync: ContextShift]( + config: RepositoryConfig, + downloader: Downloader[F], + indexDbRepository: PackageIndexRepository[F], + indexParser: IndexByteStreamParser[F], + logger: Logger[F] +) extends RepositoryIndexDownloader[F] { + + def download: F[Unit] = + for { + _ <- logger.info(s"Downloading ${config.repository} meta information") + stream <- downloader.download(Uri(config.repoIndexUrl)).pure[F].widen + index <- indexParser.parse(stream) + _ <- indexDbRepository.batchUpsert(index) + _ <- logger.info("Downloading finished") + } yield () +} diff --git a/core/src/main/scala/codesearch/core/meta/downloader/CratesIndexDownloader.scala b/core/src/main/scala/codesearch/core/meta/downloader/CratesIndexDownloader.scala new file mode 100644 index 0000000..5b8778e --- /dev/null +++ b/core/src/main/scala/codesearch/core/meta/downloader/CratesIndexDownloader.scala @@ -0,0 +1,30 @@ +package codesearch.core.meta.downloader + +import cats.effect.{ContextShift, Sync} +import cats.syntax.functor._ +import codesearch.core.config.RustConfig +import codesearch.core.db.repository.PackageIndexDbRepository +import codesearch.core.index.repository.Downloader +import codesearch.core.meta.unarchiver.RustIndexUnarchiver +import codesearch.core.util.Unarchiver +import doobie.util.transactor.Transactor +import io.chrisdavenport.log4cats.slf4j.Slf4jLogger + +object CratesIndexDownloader { + def apply[F[_]: Sync: ContextShift]( + config: RustConfig, + unarchiver: Unarchiver[F], + downloader: Downloader[F], + xa: Transactor[F] + ): F[RepositoryIndexDownloader[F]] = + for { + logger <- Slf4jLogger.create + } yield + new ArchivedIndexDownloader( + config, + downloader, + RustIndexUnarchiver(unarchiver, config), + PackageIndexDbRepository(xa), + logger + ) +} diff --git a/core/src/main/scala/codesearch/core/meta/downloader/GemIndexDownloader.scala b/core/src/main/scala/codesearch/core/meta/downloader/GemIndexDownloader.scala new file mode 100644 index 0000000..0a03a0f --- /dev/null +++ b/core/src/main/scala/codesearch/core/meta/downloader/GemIndexDownloader.scala @@ -0,0 +1,28 @@ +package codesearch.core.meta.downloader + +import cats.effect.{ContextShift, Sync} +import cats.syntax.functor._ +import codesearch.core.config.RubyConfig +import codesearch.core.db.repository.PackageIndexDbRepository +import codesearch.core.index.repository.Downloader +import codesearch.core.meta.unarchiver.RubyIndexUnarchiver +import doobie.util.transactor.Transactor +import io.chrisdavenport.log4cats.slf4j.Slf4jLogger + +object GemIndexDownloader { + def apply[F[_]: Sync: ContextShift]( + config: RubyConfig, + downloader: Downloader[F], + xa: Transactor[F] + ): F[RepositoryIndexDownloader[F]] = + for { + logger <- Slf4jLogger.create + } yield + new ArchivedIndexDownloader( + config, + downloader, + RubyIndexUnarchiver(config), + PackageIndexDbRepository(xa), + logger + ) +} diff --git a/core/src/main/scala/codesearch/core/meta/downloader/HackageIndexDownloader.scala b/core/src/main/scala/codesearch/core/meta/downloader/HackageIndexDownloader.scala new file mode 100644 index 0000000..11be5e1 --- /dev/null +++ b/core/src/main/scala/codesearch/core/meta/downloader/HackageIndexDownloader.scala @@ -0,0 +1,30 @@ +package codesearch.core.meta.downloader + +import cats.effect.{ConcurrentEffect, ContextShift} +import codesearch.core.config.HaskellConfig +import cats.syntax.functor._ +import codesearch.core.db.repository.PackageIndexDbRepository +import codesearch.core.index.repository.Downloader +import codesearch.core.meta.unarchiver.HaskellIndexUnarchiver +import codesearch.core.util.Unarchiver +import doobie.util.transactor.Transactor +import io.chrisdavenport.log4cats.slf4j.Slf4jLogger + +object HackageIndexDownloader { + def apply[F[_]: ConcurrentEffect: ContextShift]( + config: HaskellConfig, + downloader: Downloader[F], + unarchiver: Unarchiver[F], + xa: Transactor[F] + ): F[RepositoryIndexDownloader[F]] = + for { + logger <- Slf4jLogger.create + } yield + new ArchivedIndexDownloader( + config, + downloader, + HaskellIndexUnarchiver(unarchiver, config), + PackageIndexDbRepository(xa), + logger + ) +} diff --git a/core/src/main/scala/codesearch/core/meta/downloader/NpmIndexDownloader.scala b/core/src/main/scala/codesearch/core/meta/downloader/NpmIndexDownloader.scala new file mode 100644 index 0000000..172fd13 --- /dev/null +++ b/core/src/main/scala/codesearch/core/meta/downloader/NpmIndexDownloader.scala @@ -0,0 +1,29 @@ +package codesearch.core.meta.downloader + +import cats.effect.{ContextShift, Sync} +import cats.syntax.functor._ +import codesearch.core.config.JavaScriptConfig +import codesearch.core.db.repository.PackageIndexDbRepository +import codesearch.core.index.repository.Downloader +import codesearch.core.meta.parser.JavaScriptIndexParser +import doobie.util.transactor.Transactor +import io.chrisdavenport.log4cats.slf4j.Slf4jLogger + +object NpmIndexDownloader { + def apply[F[_]: Sync: ContextShift]( + config: JavaScriptConfig, + downloader: Downloader[F], + xa: Transactor[F] + ): F[RepositoryIndexDownloader[F]] = { + for { + logger <- Slf4jLogger.create + } yield + new ByteStreamIndexDownloader( + config, + downloader, + PackageIndexDbRepository(xa), + JavaScriptIndexParser(config), + logger + ) + } +} diff --git a/core/src/main/scala/codesearch/core/meta/downloader/RepositoryIndexDownloader.scala b/core/src/main/scala/codesearch/core/meta/downloader/RepositoryIndexDownloader.scala new file mode 100644 index 0000000..9139832 --- /dev/null +++ b/core/src/main/scala/codesearch/core/meta/downloader/RepositoryIndexDownloader.scala @@ -0,0 +1,10 @@ +package codesearch.core.meta.downloader + +private[meta] trait RepositoryIndexDownloader[F[_]] { + + /** + * Download meta information about packages from remote repository + * e.g. for Haskell is list of versions and cabal file for each version + */ + def download: F[Unit] +} diff --git a/core/src/main/scala/codesearch/core/meta/parser/IndexByteStreamParser.scala b/core/src/main/scala/codesearch/core/meta/parser/IndexByteStreamParser.scala new file mode 100644 index 0000000..7cb1966 --- /dev/null +++ b/core/src/main/scala/codesearch/core/meta/parser/IndexByteStreamParser.scala @@ -0,0 +1,8 @@ +package codesearch.core.meta.parser + +import codesearch.core.db.repository.PackageIndexTableRow +import fs2.Stream + +trait IndexByteStreamParser[F[_]] { + def parse(stream: Stream[F, Byte]): F[Stream[F, PackageIndexTableRow]] +} diff --git a/core/src/main/scala/codesearch/core/meta/parser/JavaScriptIndexParser.scala b/core/src/main/scala/codesearch/core/meta/parser/JavaScriptIndexParser.scala new file mode 100644 index 0000000..223da08 --- /dev/null +++ b/core/src/main/scala/codesearch/core/meta/parser/JavaScriptIndexParser.scala @@ -0,0 +1,81 @@ +package codesearch.core.meta.parser + +import cats.effect.Sync +import codesearch.core.config.JavaScriptConfig +import codesearch.core.db.repository.PackageIndexTableRow +import fs2.{Pipe, Stream} +import fs2json.{JsonToken, TokenFilter, prettyPrinter, tokenParser} +import io.circe.fs2.byteArrayParser +import io.circe.{Decoder, Json} + +final class JavaScriptIndexParser[F[_]: Sync](config: JavaScriptConfig) extends IndexByteStreamParser[F] { + + private implicit val docDecoder: Decoder[PackageIndexTableRow] = { cursor => + val doc = cursor.downField("doc") + for { + name <- doc.get[String]("name") + distTag = doc.downField("dist-tags") + tag <- distTag.get[String]("latest") + } yield PackageIndexTableRow(name, tag, config.repository) + } + + def parse(stream: Stream[F, Byte]): F[Stream[F, PackageIndexTableRow]] = { + Sync[F].pure( + stream + .through(tokenParser[F]) + .through(tokenFilter) + .through(prettyPrinter()) + .through(cutStream) + .through(byteArrayParser[F]) + .through(decoder)) + } + + private def tokenFilter: Pipe[F, JsonToken, JsonToken] = + TokenFilter.downObject + .downField("rows") + .downArray + .downObject + .downField("doc") + .downObject + .removeFields( + Set( + "_id", + "_rev", + "versions", + "description", + "maintainers", + "homepage", + "keywords", + "readme", + "author", + "bugs", + "license", + "readmeFilename" + ) + ) + + private def cutStream: Pipe[F, Byte, Byte] = { input => + var depth = 0 + input.filter { byte => + if (byte == '[') { + depth += 1; true + } else if (byte == ']') { + depth -= 1; true + } else depth > 0 + } + } + + private def decoder(implicit decode: Decoder[PackageIndexTableRow]): Pipe[F, Json, PackageIndexTableRow] = { input => + input.flatMap { json => + decode(json.hcursor) match { + case Left(_) => Stream.empty + case Right(a) => Stream.emit(a) + } + } + } +} + +object JavaScriptIndexParser { + def apply[F[_]: Sync](config: JavaScriptConfig): JavaScriptIndexParser[F] = + new JavaScriptIndexParser(config) +} diff --git a/core/src/main/scala/codesearch/core/meta/unarchiver/HaskellIndexUnarchiver.scala b/core/src/main/scala/codesearch/core/meta/unarchiver/HaskellIndexUnarchiver.scala new file mode 100644 index 0000000..df0ad85 --- /dev/null +++ b/core/src/main/scala/codesearch/core/meta/unarchiver/HaskellIndexUnarchiver.scala @@ -0,0 +1,53 @@ +package codesearch.core.meta.unarchiver + +import java.nio.file.Path + +import cats.Order +import cats.effect.{ConcurrentEffect, ContextShift, Sync} +import cats.instances.list._ +import cats.syntax.foldable._ +import cats.syntax.functor._ +import codesearch.core.config.HaskellConfig +import codesearch.core.db.repository.PackageIndexTableRow +import codesearch.core.model.Version +import codesearch.core.util.Unarchiver +import fs2.{Chunk, Stream} +import org.rauschig.jarchivelib.ArchiveFormat.TAR +import org.rauschig.jarchivelib.CompressionType.GZIP + +private[meta] final class HaskellIndexUnarchiver[F[_]: Sync]( + unarchiver: Unarchiver[F], + config: HaskellConfig +) extends StreamIndexUnarchiver[F] { + + def unarchiveToStream(path: Path): F[Stream[F, PackageIndexTableRow]] = { + for { + _ <- unarchiver.extract(path, config.repoPath, TAR, GZIP) + } yield flatPackages + } + + private def flatPackages: F[Stream[F, PackageIndexTableRow]] = { + Sync[F].pure( + Stream + .evalUnChunk(Sync[F].delay(Chunk.array(config.repoPath.toFile.listFiles))) + .filter(_.isDirectory) + .evalMap { packageDir => + Sync[F].delay { + packageDir.listFiles.toList + .filter(_.isDirectory) + .map(_.getName) + .maximumOption(Order.fromLessThan(Version.less)) + .map(version => PackageIndexTableRow(packageDir.getName, version, config.repository)) + } + } + .unNone + ) + } +} + +object HaskellIndexUnarchiver { + def apply[F[_]: ConcurrentEffect: ContextShift]( + unarchiver: Unarchiver[F], + config: HaskellConfig + ): HaskellIndexUnarchiver[F] = new HaskellIndexUnarchiver(unarchiver, config) +} diff --git a/core/src/main/scala/codesearch/core/meta/unarchiver/RubyIndexUnarchiver.scala b/core/src/main/scala/codesearch/core/meta/unarchiver/RubyIndexUnarchiver.scala new file mode 100644 index 0000000..2c9baa0 --- /dev/null +++ b/core/src/main/scala/codesearch/core/meta/unarchiver/RubyIndexUnarchiver.scala @@ -0,0 +1,47 @@ +package codesearch.core.meta.unarchiver + +import java.nio.file.Path + +import cats.effect.{ContextShift, Sync} +import cats.syntax.functor._ +import codesearch.core.BlockingEC +import codesearch.core.config.RubyConfig +import codesearch.core.db.repository.PackageIndexTableRow +import fs2.Stream +import fs2.io.file +import io.circe.fs2.{byteArrayParser, decoder} + +import scala.sys.process._ + +private[meta] final class RubyIndexUnarchiver[F[_]: Sync: ContextShift]( + config: RubyConfig +) extends StreamIndexUnarchiver[F] { + + def unarchiveToStream(path: Path): F[Stream[F, PackageIndexTableRow]] = { + for { + _ <- Sync[F].delay { + Seq( + "ruby", + config.scriptPath.toString, + path.toString, + config.repoJsonPath.toString + ) !! + } + } yield flatPackages + } + + private def flatPackages: F[Stream[F, PackageIndexTableRow]] = { + Sync[F].delay( + file + .readAll[F](config.repoJsonPath, BlockingEC, 4096) + .through(byteArrayParser) + .through(decoder[F, Seq[String]]) + .collect { case Seq(name, version, _) => PackageIndexTableRow(name, version, config.repository) }) + } +} + +private[meta] object RubyIndexUnarchiver { + def apply[F[_]: Sync: ContextShift]( + config: RubyConfig + ): RubyIndexUnarchiver[F] = new RubyIndexUnarchiver(config) +} diff --git a/core/src/main/scala/codesearch/core/meta/unarchiver/RustIndexUnarchiver.scala b/core/src/main/scala/codesearch/core/meta/unarchiver/RustIndexUnarchiver.scala new file mode 100644 index 0000000..9f29090 --- /dev/null +++ b/core/src/main/scala/codesearch/core/meta/unarchiver/RustIndexUnarchiver.scala @@ -0,0 +1,49 @@ +package codesearch.core.meta.unarchiver + +import java.nio.file.Path + +import cats.effect.Sync +import cats.syntax.functor._ +import codesearch.core.config.RustConfig +import codesearch.core.db.repository.PackageIndexTableRow +import codesearch.core.util.{FsUtils, Unarchiver} +import fs2.Stream +import io.circe.Decoder +import io.circe.fs2._ +import org.rauschig.jarchivelib.ArchiveFormat.ZIP + +private[meta] final class RustIndexUnarchiver[F[_]: Sync]( + unarchiver: Unarchiver[F], + config: RustConfig +) extends StreamIndexUnarchiver[F] { + + private implicit val packageDecoder: Decoder[PackageIndexTableRow] = { cursor => + for { + name <- cursor.get[String]("name") + version <- cursor.get[String]("vers") + } yield PackageIndexTableRow(name, version, config.repository) + } + + def unarchiveToStream(path: Path): F[Stream[F, PackageIndexTableRow]] = { + for { + _ <- unarchiver.extract(path, config.repoPath, ZIP) + } yield flatPackages + } + + private def flatPackages: F[Stream[F, PackageIndexTableRow]] = { + Sync[F].delay( + FsUtils + .recursiveListFiles(config.repoPath.toFile) + .filter(file => !config.ignoreFiles.contains(file.getName)) + .evalMap(file => FsUtils.readFileAsync(file.getAbsolutePath).map(_.last)) + .through(stringStreamParser) + .through(decoder[F, PackageIndexTableRow])) + } +} + +private[meta] object RustIndexUnarchiver { + def apply[F[_]: Sync]( + unarchiver: Unarchiver[F], + config: RustConfig + ): RustIndexUnarchiver[F] = new RustIndexUnarchiver(unarchiver, config) +} diff --git a/core/src/main/scala/codesearch/core/meta/unarchiver/StreamIndexUnarchiver.scala b/core/src/main/scala/codesearch/core/meta/unarchiver/StreamIndexUnarchiver.scala new file mode 100644 index 0000000..319606b --- /dev/null +++ b/core/src/main/scala/codesearch/core/meta/unarchiver/StreamIndexUnarchiver.scala @@ -0,0 +1,10 @@ +package codesearch.core.meta.unarchiver + +import java.nio.file.Path + +import codesearch.core.db.repository.PackageIndexTableRow +import fs2.Stream + +private[meta] trait StreamIndexUnarchiver[F[_]] { + def unarchiveToStream(path: Path): F[Stream[F, PackageIndexTableRow]] +} diff --git a/core/src/main/scala/codesearch/core/model/CratesTable.scala b/core/src/main/scala/codesearch/core/model/CratesTable.scala index a5447af..8e9c689 100644 --- a/core/src/main/scala/codesearch/core/model/CratesTable.scala +++ b/core/src/main/scala/codesearch/core/model/CratesTable.scala @@ -2,4 +2,4 @@ package codesearch.core.model import slick.jdbc.PostgresProfile.api._ -class CratesTable(tag: Tag) extends DefaultTable(tag, "CRATES") {} +class CratesTable(tag: Tag) extends DefaultTable(tag, "CRATES") diff --git a/core/src/main/scala/codesearch/core/model/DefaultTable.scala b/core/src/main/scala/codesearch/core/model/DefaultTable.scala index ad3bcde..0759ea1 100644 --- a/core/src/main/scala/codesearch/core/model/DefaultTable.scala +++ b/core/src/main/scala/codesearch/core/model/DefaultTable.scala @@ -7,13 +7,9 @@ import slick.jdbc.PostgresProfile.api._ // TODO: UTC class DefaultTable(tag: Tag, tableName: String) extends Table[(String, String, Timestamp)](tag, tableName) { - def packageName = column[String](s"${tableName}_PACKAGE_NAME", O.PrimaryKey) - def lastVersion = column[String](s"${tableName}_VERSION") - - def updated = column[Timestamp](s"${tableName}_UPDATED") - def * = (packageName, lastVersion, updated) - + def packageName = column[String](s"${tableName}_PACKAGE_NAME", O.PrimaryKey) + def lastVersion = column[String](s"${tableName}_VERSION") + def updated = column[Timestamp](s"${tableName}_UPDATED") def indexTimestamps = index(s"${tableName}_LAST_UPDATED", updated) - } diff --git a/core/src/main/scala/codesearch/core/model/GemTable.scala b/core/src/main/scala/codesearch/core/model/GemTable.scala index 0832e53..63cac4a 100644 --- a/core/src/main/scala/codesearch/core/model/GemTable.scala +++ b/core/src/main/scala/codesearch/core/model/GemTable.scala @@ -2,4 +2,4 @@ package codesearch.core.model import slick.jdbc.PostgresProfile.api._ -class GemTable(tag: Tag) extends DefaultTable(tag, "GEM") {} +class GemTable(tag: Tag) extends DefaultTable(tag, "GEM") diff --git a/core/src/main/scala/codesearch/core/model/HackageTable.scala b/core/src/main/scala/codesearch/core/model/HackageTable.scala index 1c70269..24b87df 100644 --- a/core/src/main/scala/codesearch/core/model/HackageTable.scala +++ b/core/src/main/scala/codesearch/core/model/HackageTable.scala @@ -2,4 +2,4 @@ package codesearch.core.model import slick.jdbc.PostgresProfile.api._ -class HackageTable(tag: Tag) extends DefaultTable(tag, "HACKAGE") {} +class HackageTable(tag: Tag) extends DefaultTable(tag, "HACKAGE") diff --git a/core/src/main/scala/codesearch/core/model/NpmTable.scala b/core/src/main/scala/codesearch/core/model/NpmTable.scala index f9a92fb..196494e 100644 --- a/core/src/main/scala/codesearch/core/model/NpmTable.scala +++ b/core/src/main/scala/codesearch/core/model/NpmTable.scala @@ -2,4 +2,4 @@ package codesearch.core.model import slick.jdbc.PostgresProfile.api._ -class NpmTable(tag: Tag) extends DefaultTable(tag, "NPM") {} +class NpmTable(tag: Tag) extends DefaultTable(tag, "NPM") diff --git a/core/src/main/scala/codesearch/core/model/Version.scala b/core/src/main/scala/codesearch/core/model/Version.scala index 719996a..8ac083b 100644 --- a/core/src/main/scala/codesearch/core/model/Version.scala +++ b/core/src/main/scala/codesearch/core/model/Version.scala @@ -1,10 +1,9 @@ package codesearch.core.model -case class Version(verString: String) extends Ordered[Version] { - import scala.math.Ordered.orderingToOrdered +import scala.math.Ordered.orderingToOrdered +case class Version(verString: String) extends Ordered[Version] { val version: Iterable[Long] = ("""\d+""".r findAllIn verString).toSeq.map(_.toLong) - override def compare(that: Version): Int = this.version compare that.version } diff --git a/core/src/main/scala/codesearch/core/search/SearchRequest.scala b/core/src/main/scala/codesearch/core/search/SearchRequest.scala index 82e3025..ba4e073 100644 --- a/core/src/main/scala/codesearch/core/search/SearchRequest.scala +++ b/core/src/main/scala/codesearch/core/search/SearchRequest.scala @@ -21,7 +21,9 @@ case class SearchRequest( spaceInsensitive: Boolean, preciseMatch: Boolean, sourcesOnly: Boolean, - page: Int + excludeTests: Boolean, + page: Int, + limit: Int ) { /** @@ -46,7 +48,9 @@ object SearchRequest { spaceInsensitive: String, preciseMatch: String, sourcesOnly: String, - page: String + excludeTests: String, + page: String, + limit: String ): SearchRequest = { SearchRequest( lang, @@ -57,7 +61,9 @@ object SearchRequest { isEnabled(spaceInsensitive), isEnabled(preciseMatch), isEnabled(sourcesOnly), + isEnabled(excludeTests), page.toInt, + limit.toInt ) } diff --git a/core/src/main/scala/codesearch/core/search/engine/CodeSearcher.scala b/core/src/main/scala/codesearch/core/search/engine/CodeSearcher.scala new file mode 100644 index 0000000..99beb8c --- /dev/null +++ b/core/src/main/scala/codesearch/core/search/engine/CodeSearcher.scala @@ -0,0 +1,25 @@ +package codesearch.core.search.engine + +import cats.effect.Sync +import codesearch.core.search.SearchRequest +import codesearch.core.search.engine.csearch.MatchedRow +import cats.syntax.flatMap._ +import codesearch.core.config.SnippetConfig +import fs2.Stream + +sealed trait Response +case class ErrorResponse(message: String) extends Response +case class SuccessfulResponse[T](value: T) extends Response + +object CodeSearcher { + def apply[F[_]: Sync]( + csearchProvider: SearchProvider[F, SearchRequest, Stream[F, MatchedRow]], + snippetConfig: SnippetConfig + ): SearchProvider[F, SearchRequest, Response] = (request: SearchRequest) => { + val snippetGrouper = StreamSnippetGrouper(snippetConfig) + val matchedRows = csearchProvider.searchBy(request) + val a = matchedRows.flatMap { rows => + rows.through(snippetGrouper.group).through() + } + } +} diff --git a/core/src/main/scala/codesearch/core/search/engine/SearchProvider.scala b/core/src/main/scala/codesearch/core/search/engine/SearchProvider.scala new file mode 100644 index 0000000..76f99ae --- /dev/null +++ b/core/src/main/scala/codesearch/core/search/engine/SearchProvider.scala @@ -0,0 +1,5 @@ +package codesearch.core.search.engine + +trait SearchProvider[F[_], QueryParam, Result] { + def searchBy(param: QueryParam): F[Result] +} \ No newline at end of file diff --git a/core/src/main/scala/codesearch/core/search/engine/StreamSnippetGrouper.scala b/core/src/main/scala/codesearch/core/search/engine/StreamSnippetGrouper.scala new file mode 100644 index 0000000..8183c53 --- /dev/null +++ b/core/src/main/scala/codesearch/core/search/engine/StreamSnippetGrouper.scala @@ -0,0 +1,47 @@ +package codesearch.core.search.engine + +import cats.Applicative +import cats.data.NonEmptyVector +import codesearch.core.config.SnippetConfig +import codesearch.core.search.engine.csearch.MatchedRow +import fs2._ +import cats.instances.string._ + +/** + * Info about code snippet + * + * @param filePath absolute path to file + * @param lines numbers of matched lines in file + */ +case class SnippetInfo(filePath: String, lines: NonEmptyVector[Int]) + +final class StreamSnippetGrouper[F[_]: Applicative](config: SnippetConfig) { + + def group: Pipe[F, MatchedRow, SnippetInfo] = { matchedRows => + for { + (_, matchedRow) <- matchedRows.groupAdjacentBy(_.path) + snippets <- Stream.emits(groupToSnippets(matchedRow)) + } yield snippets + + def groupToSnippets(rows: Chunk[MatchedRow]): Seq[SnippetInfo] = { + rows.foldLeft(Vector.empty[SnippetInfo]) { (snippets, row) => + snippets.lastOption match { + case Some(snippet) => + if (row.lineNumber < snippet.lines.last + config.linesAfter) + snippets.init :+ snippet.copy(lines = snippet.lines :+ row.lineNumber) + else + snippets :+ SnippetInfo(row.path, NonEmptyVector.one(row.lineNumber)) + case None => + snippets :+ SnippetInfo(row.path, NonEmptyVector.one(row.lineNumber)) + } + } + } + } + +} + +object StreamSnippetGrouper { + def apply[F[_]: Applicative]( + config: SnippetConfig + ): StreamSnippetGrouper[F] = new StreamSnippetGrouper[F](config) +} diff --git a/core/src/main/scala/codesearch/core/search/engine/csearch/CsearchProvider.scala b/core/src/main/scala/codesearch/core/search/engine/csearch/CsearchProvider.scala new file mode 100644 index 0000000..902355e --- /dev/null +++ b/core/src/main/scala/codesearch/core/search/engine/csearch/CsearchProvider.scala @@ -0,0 +1,66 @@ +package codesearch.core.search.engine.csearch + +import cats.effect.Sync +import cats.syntax.flatMap._ +import cats.syntax.functor._ +import cats.syntax.applicative._ +import codesearch.core.config.{CindexConfig, SourcesFilesConfig} +import codesearch.core.regex.RegexConstructor +import codesearch.core.search.SearchRequest +import codesearch.core.search.engine.SearchProvider +import fs2.{Pipe, Stream} +import io.chrisdavenport.log4cats.Logger + +import scala.sys.process.Process + +case class MatchedRow(path: String, lineNumber: Int) + +object CsearchProvider { + def apply[F[_]: Sync]( + sourcesFilesConfig: SourcesFilesConfig, + cindexConfig: CindexConfig, + logger: Logger[F] + ): SearchProvider[F, SearchRequest, Stream[F, MatchedRow]] = (request: SearchRequest) => { + + val indexDir = cindexConfig.indexDir + val environment = ("CSEARCHINDEX", indexDir) + val pipe = Seq("head", s"-${request.limit}") + val process = Process(arguments(request), None, environment) #| pipe + + def parse: Pipe[F, String, MatchedRow] = { lines => + lines.map { row => + val Array(path, lineNumber) = row.split(":").take(2) //filePath:lineNumber:matchedString + MatchedRow(path, lineNumber.toInt) + } + } + + def arguments(request: SearchRequest): Seq[String] = { + val searchInFilesRegexp: String = request.filePath match { + case Some(filePath) => filePath + case None => + if (request.sourcesOnly) { + val sourcesExtensionsRegexp = + sourcesFilesConfig.filesExtensions.sourceExtensions.mkString(".*\\.(", "|", ")$") + if (request.excludeTests) { + val excludedTestDirsRegexp = sourcesFilesConfig.testDirsNames.mkString("^(?!.*(", "|", "))") + excludedTestDirsRegexp + sourcesExtensionsRegexp + } else sourcesExtensionsRegexp + } else ".*" + } + + val queryRegex = + RegexConstructor(request.query, request.insensitive, request.spaceInsensitive, request.preciseMatch) + + request.filter match { + case Some(filter) => Seq("csearch", "-n", "-f", searchInFilesRegexp, queryRegex, filter) + case None => Seq("csearch", "-n", "-f", searchInFilesRegexp, queryRegex) + } + } + + for { + _ <- logger.debug(s"running CSEARCHINDEX=$indexDir ${arguments(request).mkString(" ")}") + resultRows <- Sync[F].delay(process.lineStream.toList) + parsedResultRows <- Stream.emits(resultRows).through(parse).pure[F] + } yield parsedResultRows + } +} diff --git a/core/src/main/scala/codesearch/core/sources/HaskellPackageSourcesUpdater.scala b/core/src/main/scala/codesearch/core/sources/HaskellPackageSourcesUpdater.scala new file mode 100644 index 0000000..fee5495 --- /dev/null +++ b/core/src/main/scala/codesearch/core/sources/HaskellPackageSourcesUpdater.scala @@ -0,0 +1,5 @@ +package codesearch.core.sources + +final class HaskellPackageSourcesUpdater[F[_]] extends SourcesUpdater[F] { + def update: F[Unit] = +} diff --git a/core/src/main/scala/codesearch/core/sources/PackageSourcesUpdater.scala b/core/src/main/scala/codesearch/core/sources/PackageSourcesUpdater.scala new file mode 100644 index 0000000..9808734 --- /dev/null +++ b/core/src/main/scala/codesearch/core/sources/PackageSourcesUpdater.scala @@ -0,0 +1,45 @@ +package codesearch.core.sources + +import cats.effect.concurrent.Deferred +import cats.effect.{Concurrent, Sync, Timer} +import codesearch.core.config.{RateLimiterConfig, RepositoryConfig} +import codesearch.core.db.repository.PackageIndexRepository +import codesearch.core.sources.downloader.SourcesDownloader +import upperbound.syntax.rate._ +import fs2.Stream +import upperbound.{Limiter, Rate} +import cats.syntax.all._ + +import scala.concurrent.duration._ + +trait SourcesUpdater[F[_]] { + def update: F[Unit] +} + +class PackageSourcesUpdater[F[_]: Concurrent: Timer]( + indexDbRepository: PackageIndexRepository[F], + downloader: SourcesDownloader[F], + rateLimiterConfig: Option[RateLimiterConfig] = None +) extends SourcesUpdater[F] { + + def update: F[Unit] = { + val latestPackages = indexDbRepository.findLatestByRepository(config.repository) + rateLimiterConfig match { + case Some(rate) => + } + + for { + d <- Deferred[F, A] + } Limiter.start[F](maxRate = 10 every 1.seconds).use { limiter => + limiter.submit() + + } + + } + + private def updateStream: Stream[F, F[Unit]] = + indexDbRepository + .findLatestByRepository(config.repository) + .map(downloader.download) + +} diff --git a/core/src/main/scala/codesearch/core/sources/downloader/HaskellSourcesDownloader.scala b/core/src/main/scala/codesearch/core/sources/downloader/HaskellSourcesDownloader.scala new file mode 100644 index 0000000..2b1f108 --- /dev/null +++ b/core/src/main/scala/codesearch/core/sources/downloader/HaskellSourcesDownloader.scala @@ -0,0 +1,30 @@ +package codesearch.core.sources.downloader + +import cats.effect.Sync +import cats.syntax.functor._ +import codesearch.core.config.PackageDownloaderConfig +import codesearch.core.db.repository.PackageDbRepository +import codesearch.core.index.repository.Downloader +import codesearch.core.index.repository.Extensions.HaskellExtensions +import codesearch.core.sources.filter.FileFilter +import codesearch.core.sources.unarchiver.SourcesUnarchiver +import io.chrisdavenport.log4cats.slf4j.Slf4jLogger + +object HaskellSourcesDownloader { + def apply[F[_]: Sync]( + downloader: Downloader[F], + packageDbRepository: PackageDbRepository[F], + downloaderConfig: PackageDownloaderConfig + ): F[SourcesDownloader[F]] = + for { + logger <- Slf4jLogger.create + } yield + SourcesDownloader( + downloader, + SourcesUnarchiver[F], + FileFilter[F](HaskellExtensions, downloaderConfig.filterConfig.allowedFileNames), + packageDbRepository, + downloaderConfig, + logger + ) +} diff --git a/core/src/main/scala/codesearch/core/sources/downloader/JavaScriptSourcesDownloader.scala b/core/src/main/scala/codesearch/core/sources/downloader/JavaScriptSourcesDownloader.scala new file mode 100644 index 0000000..bedfb3f --- /dev/null +++ b/core/src/main/scala/codesearch/core/sources/downloader/JavaScriptSourcesDownloader.scala @@ -0,0 +1,30 @@ +package codesearch.core.sources.downloader + +import cats.effect.Sync +import cats.syntax.functor._ +import codesearch.core.config.PackageDownloaderConfig +import codesearch.core.db.repository.PackageDbRepository +import codesearch.core.index.repository.Downloader +import codesearch.core.index.repository.Extensions.JavaScriptExtensions +import codesearch.core.sources.filter.FileFilter +import codesearch.core.sources.unarchiver.SourcesUnarchiver +import io.chrisdavenport.log4cats.slf4j.Slf4jLogger + +object JavaScriptSourcesDownloader { + def apply[F[_]: Sync]( + downloader: Downloader[F], + packageDbRepository: PackageDbRepository[F], + downloaderConfig: PackageDownloaderConfig + ): F[SourcesDownloader[F]] = + for { + logger <- Slf4jLogger.create + } yield + SourcesDownloader( + downloader, + SourcesUnarchiver[F], + FileFilter[F](JavaScriptExtensions, downloaderConfig.filterConfig.allowedFileNames), + packageDbRepository, + downloaderConfig, + logger + ) +} diff --git a/core/src/main/scala/codesearch/core/sources/downloader/RubySourcesDownloader.scala b/core/src/main/scala/codesearch/core/sources/downloader/RubySourcesDownloader.scala new file mode 100644 index 0000000..87aac24 --- /dev/null +++ b/core/src/main/scala/codesearch/core/sources/downloader/RubySourcesDownloader.scala @@ -0,0 +1,30 @@ +package codesearch.core.sources.downloader + +import cats.effect.Sync +import cats.syntax.functor._ +import codesearch.core.config.PackageDownloaderConfig +import codesearch.core.db.repository.PackageDbRepository +import codesearch.core.index.repository.Downloader +import codesearch.core.index.repository.Extensions.RubyExtensions +import codesearch.core.sources.filter.FileFilter +import codesearch.core.sources.unarchiver.RubySourcesUnarchiver +import io.chrisdavenport.log4cats.slf4j.Slf4jLogger + +object RubySourcesDownloader { + def apply[F[_]: Sync]( + downloader: Downloader[F], + packageDbRepository: PackageDbRepository[F], + downloaderConfig: PackageDownloaderConfig + ): F[SourcesDownloader[F]] = + for { + logger <- Slf4jLogger.create + } yield + SourcesDownloader( + downloader, + RubySourcesUnarchiver[F], + FileFilter[F](RubyExtensions, downloaderConfig.filterConfig.allowedFileNames), + packageDbRepository, + downloaderConfig, + logger + ) +} diff --git a/core/src/main/scala/codesearch/core/sources/downloader/RustSourcesDownloader.scala b/core/src/main/scala/codesearch/core/sources/downloader/RustSourcesDownloader.scala new file mode 100644 index 0000000..e31b495 --- /dev/null +++ b/core/src/main/scala/codesearch/core/sources/downloader/RustSourcesDownloader.scala @@ -0,0 +1,30 @@ +package codesearch.core.sources.downloader + +import cats.effect.Sync +import cats.syntax.functor._ +import codesearch.core.config.PackageDownloaderConfig +import codesearch.core.db.repository.PackageDbRepository +import codesearch.core.index.repository.Downloader +import codesearch.core.index.repository.Extensions.RustExtensions +import codesearch.core.sources.filter.FileFilter +import codesearch.core.sources.unarchiver.SourcesUnarchiver +import io.chrisdavenport.log4cats.slf4j.Slf4jLogger + +object RustSourcesDownloader { + def apply[F[_]: Sync]( + downloader: Downloader[F], + packageDbRepository: PackageDbRepository[F], + downloaderConfig: PackageDownloaderConfig + ): F[SourcesDownloader[F]] = + for { + logger <- Slf4jLogger.create + } yield + SourcesDownloader( + downloader, + SourcesUnarchiver[F], + FileFilter[F](RustExtensions, downloaderConfig.filterConfig.allowedFileNames), + packageDbRepository, + downloaderConfig, + logger + ) +} diff --git a/core/src/main/scala/codesearch/core/sources/downloader/SourcesDownloader.scala b/core/src/main/scala/codesearch/core/sources/downloader/SourcesDownloader.scala new file mode 100644 index 0000000..4cc39d1 --- /dev/null +++ b/core/src/main/scala/codesearch/core/sources/downloader/SourcesDownloader.scala @@ -0,0 +1,40 @@ +package codesearch.core.sources.downloader + +import java.nio.file.Paths + +import cats.effect.Sync +import cats.syntax.flatMap._ +import cats.syntax.functor._ +import codesearch.core.config.PackageDownloaderConfig +import codesearch.core.db.repository.{PackageDbRepository, PackageIndexTableRow} +import codesearch.core.index.repository.Downloader +import codesearch.core.sources.filter.FileFilter +import codesearch.core.sources.unarchiver.SourcesUnarchiver +import com.softwaremill.sttp._ +import io.chrisdavenport.log4cats.Logger + +trait SourcesDownloader[F[_]] { + def download(index: PackageIndexTableRow): F[Unit] +} + +object SourcesDownloader { + def apply[F[_]: Sync]( + downloader: Downloader[F], + unarchiver: SourcesUnarchiver[F], + fileFilter: FileFilter[F], + packageDbRepository: PackageDbRepository[F], + config: PackageDownloaderConfig, + logger: Logger[F] + ): SourcesDownloader[F] = (index: PackageIndexTableRow) => { + val packageUrl = Uri(config.packageUrl.format(index.name, index.version)) + val archivePath = Paths.get(config.packageArchivePath.format(index.name, index.version)) + val sourcesPath = Paths.get(config.packageSourcesPath.format(index.name, index.version)) + for { + _ <- logger.info(s"Downloading ${index.name}-${index.version} sources") + archive <- downloader.download(packageUrl, archivePath) + sourcesDir <- unarchiver.unarchive(archive, sourcesPath) + _ <- fileFilter.filter(sourcesDir) + _ <- packageDbRepository.upsert(index.name, index.version, index.repository) + } yield () + } +} diff --git a/core/src/main/scala/codesearch/core/sources/filter/FileFilter.scala b/core/src/main/scala/codesearch/core/sources/filter/FileFilter.scala new file mode 100644 index 0000000..2bd03ad --- /dev/null +++ b/core/src/main/scala/codesearch/core/sources/filter/FileFilter.scala @@ -0,0 +1,45 @@ +package codesearch.core.sources.filter + +import java.io.File +import java.nio.file.Path + +import cats.effect.Sync +import cats.instances.list._ +import cats.syntax.applicative._ +import cats.syntax.flatMap._ +import cats.syntax.functor._ +import cats.syntax.traverse._ +import codesearch.core.index.repository.Extensions +import org.apache.commons.io.FilenameUtils.getExtension + +trait FileFilter[F[_]] { + def filter(dir: Path): F[Int] +} + +object FileFilter { + def apply[F[_]: Sync]( + extensions: Extensions, + allowedFileNames: Set[String] + ): FileFilter[F] = new FileFilter[F] { + + private val maxFileSize: Int = 1024 * 1024 + + def filter(dir: Path): F[Int] = Sync[F].delay(filterRecursively(dir.toFile, filter)) + + private def filterRecursively(dir: File, predicate: File => Boolean): F[Int] = { + for { + (dirs, files) <- Sync[F].delay(dir.listFiles.toList.partition(_.isDirectory)) + filesDeleted <- files.filterNot(predicate).traverse(file => Sync[F].delay(file.delete)).map(_.size) + nestedFilesDeleted <- dirs.traverse(dir => filterRecursively(dir, predicate)).map(_.size) + _ <- Sync[F].delay(dir.delete).whenA(dir.listFiles.isEmpty) + } yield filesDeleted + nestedFilesDeleted + } + + private def filter(file: File): Boolean = { + val fileName = file.getName.toLowerCase + val fileExt = getExtension(fileName) + (if (fileExt.isEmpty) allowedFileNames.contains(fileName) + else extensions.extensions.contains(fileExt)) && file.length < maxFileSize + } + } +} diff --git a/core/src/main/scala/codesearch/core/sources/unarchiver/RubySourcesUnarchiver.scala b/core/src/main/scala/codesearch/core/sources/unarchiver/RubySourcesUnarchiver.scala new file mode 100644 index 0000000..30f806c --- /dev/null +++ b/core/src/main/scala/codesearch/core/sources/unarchiver/RubySourcesUnarchiver.scala @@ -0,0 +1,22 @@ +package codesearch.core.sources.unarchiver + +import java.nio.file.Path + +import cats.effect.Sync +import org.rauschig.jarchivelib.ArchiveFormat.TAR +import org.rauschig.jarchivelib.ArchiverFactory +import org.rauschig.jarchivelib.CompressionType.GZIP + +object RubySourcesUnarchiver { + def apply[F[_]: Sync]: SourcesUnarchiver[F] = + (archive: Path, directory: Path) => + Sync[F].delay { + val destDir = directory.toFile + val allowedSet = Set("tgz", "tar.gz") + ArchiverFactory.createArchiver(TAR).extract(archive.toFile, destDir) + destDir.listFiles + .filter(file => allowedSet.exists(file.getName.toLowerCase.endsWith)) + .foreach(file => ArchiverFactory.createArchiver(TAR, GZIP).extract(file, destDir)) + directory + } +} diff --git a/core/src/main/scala/codesearch/core/sources/unarchiver/SourcesUnarchiver.scala b/core/src/main/scala/codesearch/core/sources/unarchiver/SourcesUnarchiver.scala new file mode 100644 index 0000000..852d2d6 --- /dev/null +++ b/core/src/main/scala/codesearch/core/sources/unarchiver/SourcesUnarchiver.scala @@ -0,0 +1,44 @@ +package codesearch.core.sources.unarchiver + +import java.nio.file.Path + +import cats.effect.Sync +import cats.syntax.flatMap._ +import cats.syntax.functor._ +import org.apache.commons.io.FileUtils.{moveDirectoryToDirectory, moveFileToDirectory} +import org.rauschig.jarchivelib.ArchiveFormat.TAR +import org.rauschig.jarchivelib.ArchiverFactory +import org.rauschig.jarchivelib.CompressionType.GZIP + +trait SourcesUnarchiver[F[_]] { + + /** Return directory containing all unarchived files and directories + * + * @param archive is file to unarchiving + * @param directory is target directory + * @return directory containing all unarchived files and directories + */ + def unarchive(archive: Path, directory: Path): F[Path] +} + +object SourcesUnarchiver { + def apply[F[_]: Sync]: SourcesUnarchiver[F] = new SourcesUnarchiver[F]() { + def unarchive(archive: Path, directory: Path): F[Path] = { + for { + _ <- Sync[F].delay(ArchiverFactory.createArchiver(TAR, GZIP).extract(archive.toFile, directory.toFile)) + path <- flatDir(directory) + } yield path + } + + private def flatDir(unarchived: Path): F[Path] = Sync[F].delay { + val dir = unarchived.toFile + val notCreateDestDir = false + dir.listFiles + .filter(_.isDirectory) + .foreach(_.listFiles.foreach(file => + if (file.isDirectory) moveDirectoryToDirectory(file, dir, notCreateDestDir) + else moveFileToDirectory(file, dir, notCreateDestDir))) + unarchived + } + } +} diff --git a/core/src/main/scala/codesearch/core/syntax/path.scala b/core/src/main/scala/codesearch/core/syntax/path.scala index f2a1436..4cb38eb 100644 --- a/core/src/main/scala/codesearch/core/syntax/path.scala +++ b/core/src/main/scala/codesearch/core/syntax/path.scala @@ -2,6 +2,7 @@ package codesearch.core.syntax import java.nio.file.{Path, Paths} object path { + implicit final def string2Path(pathString: String): Path = Paths.get(pathString) implicit final class RichNioPath(private val parent: Path) extends AnyVal { def /(child: Path): Path = Paths.get(parent.toFile.getPath, child.toFile.getPath) def /(child: String): Path = Paths.get(parent.toFile.getPath, child) diff --git a/core/src/main/scala/codesearch/core/util/FsUtils.scala b/core/src/main/scala/codesearch/core/util/FsUtils.scala new file mode 100644 index 0000000..001599f --- /dev/null +++ b/core/src/main/scala/codesearch/core/util/FsUtils.scala @@ -0,0 +1,23 @@ +package codesearch.core.util + +import java.io.File + +import cats.effect.{Resource, Sync} +import fs2.{Chunk, Stream} + +import scala.io.Source + +object FsUtils { + + def recursiveListFiles[F[_]: Sync](cur: File): Stream[F, File] = { + val stream = Stream.evalUnChunk(Sync[F].delay(Chunk.array(cur.listFiles))) + val files = stream.filter(_.isFile) + val filesFromDirs = stream.filter(_.isDirectory).flatMap(recursiveListFiles) + files ++ filesFromDirs + } + + def readFileAsync[F[_]: Sync](path: String): F[List[String]] = + Resource + .fromAutoCloseable(Sync[F].delay(Source.fromFile(path, "UTF-8"))) + .use(source => Sync[F].delay(source.getLines.toList)) +} diff --git a/core/src/main/scala/codesearch/core/util/Unarchiver.scala b/core/src/main/scala/codesearch/core/util/Unarchiver.scala index 0110e5e..de4dc3b 100644 --- a/core/src/main/scala/codesearch/core/util/Unarchiver.scala +++ b/core/src/main/scala/codesearch/core/util/Unarchiver.scala @@ -21,19 +21,19 @@ trait Unarchiver[F[_]] { } object Unarchiver { - def apply[F[_]](implicit F: Sync[F]): Unarchiver[F] = new Unarchiver[F] { + def apply[F[_]: Sync]: Unarchiver[F] = new Unarchiver[F] { def extract( archive: Path, to: Path, format: ArchiveFormat, compressionType: CompressionType - ): F[Unit] = F.delay { + ): F[Unit] = Sync[F].delay { ArchiverFactory .createArchiver(format, compressionType) .extract(archive.toFile, to.toFile) } - def extract(archive: Path, to: Path, archiveFormat: ArchiveFormat): F[Unit] = F.delay { + def extract(archive: Path, to: Path, archiveFormat: ArchiveFormat): F[Unit] = Sync[F].delay { ArchiverFactory .createArchiver(archiveFormat) .extract(archive.toFile, to.toFile) diff --git a/core/src/main/scala/codesearch/core/util/manatki/Raise.scala b/core/src/main/scala/codesearch/core/util/manatki/Raise.scala new file mode 100644 index 0000000..22de29d --- /dev/null +++ b/core/src/main/scala/codesearch/core/util/manatki/Raise.scala @@ -0,0 +1,16 @@ +package codesearch.core.util.manatki + +import cats.ApplicativeError + +trait Raise[F[_], E] { + def raise[A](err: E): F[A] +} + +object Raise { + implicit def raiseApplicativeError[F[_], E, E1]( + implicit appErr: ApplicativeError[F, E], + sub: E1 <:< E + ): Raise[F, E1] = new Raise[F, E1] { + override def raise[A](err: E1): F[A] = appErr.raiseError(err) + } +} diff --git a/core/src/main/scala/codesearch/core/util/manatki/syntax/raise.scala b/core/src/main/scala/codesearch/core/util/manatki/syntax/raise.scala new file mode 100644 index 0000000..695fa03 --- /dev/null +++ b/core/src/main/scala/codesearch/core/util/manatki/syntax/raise.scala @@ -0,0 +1,30 @@ +package codesearch.core.util.manatki.syntax + +import cats.Applicative +import codesearch.core.util.manatki.Raise + +object raise { + final implicit class RaiseOps[E](val err: E) extends AnyVal { + def raise[F[_], A](implicit raise: Raise[F, E]): F[A] = raise.raise(err) + } + + final implicit class RaiseOptionOps[A](val opt: Option[A]) extends AnyVal { + def liftTo[F[_]] = new RaiseLiftToApplied[F, A](opt) + } + + final implicit class RaiseEitherOps[E, A](val either: Either[E, A]) extends AnyVal { + def toRaise[F[_]](implicit app: Applicative[F], raise: Raise[F, E]): F[A] = + either match { + case Left(err) => raise.raise(err) + case Right(value) => app.pure(value) + } + } + + class RaiseLiftToApplied[F[_], A](val opt: Option[A]) extends AnyVal { + def apply[E](err: => E)(implicit raise: Raise[F, E], app: Applicative[F]): F[A] = + opt match { + case None => raise.raise(err) + case Some(a) => app.pure(a) + } + } +} \ No newline at end of file diff --git a/project/Builder.scala b/project/Builder.scala index e39d39e..0e3414c 100644 --- a/project/Builder.scala +++ b/project/Builder.scala @@ -63,10 +63,11 @@ object Builder { libraryDependencies ++= Seq( "com.typesafe.slick" %% "slick" % "3.2.3", "com.typesafe.slick" %% "slick-hikaricp" % "3.2.3", + "com.github.tminglei" %% "slick-pg" % "0.17.2", "org.postgresql" % "postgresql" % "42.2.2", "com.softwaremill.sttp" %% "async-http-client-backend-fs2" % "1.3.8", - "co.fs2" %% "fs2-core" % "1.0.0", - "co.fs2" %% "fs2-io" % "1.0.0", + "co.fs2" %% "fs2-core" % "1.0.4", + "co.fs2" %% "fs2-io" % "1.0.4", "io.circe" %% "circe-fs2" % "0.10.0", "io.circe" %% "circe-core" % "0.10.0", "io.circe" %% "circe-generic" % "0.10.0", @@ -75,8 +76,15 @@ object Builder { "com.github.pureconfig" %% "pureconfig" % "0.9.2", "com.github.pureconfig" %% "pureconfig-cats-effect" % "0.9.2", "io.chrisdavenport" %% "log4cats-slf4j" % "0.2.0-RC2", + "org.apache.commons" % "commons-compress" % "1.18", "org.scalactic" %% "scalactic" % "3.0.5", - "org.scalatest" %% "scalatest" % "3.0.5" % "test" + "org.scalatest" %% "scalatest" % "3.0.5" % "test", + "org.tpolecat" %% "doobie-core" % "0.6.0", + "org.tpolecat" %% "doobie-hikari" % "0.6.0", + "org.tpolecat" %% "doobie-postgres" % "0.6.0", + "org.tpolecat" %% "doobie-specs2" % "0.6.0", + "org.flywaydb" % "flyway-core" % "5.2.4", + "org.systemfw" % "upperbound_2.12" % "0.2.0-M2", ), assemblyMergeStrategy in assembly := { case PathList("META-INF", _ @_*) => MergeStrategy.discard diff --git a/scripts/deploy.sh b/scripts/deploy.sh deleted file mode 100755 index 9ba8a8c..0000000 --- a/scripts/deploy.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - -ssh root@167.99.88.190 " - set -x - cd /root/aelve/codesearch - git pull - sbt web-server/assembly - systemctl restart codesearch.service -"