Skip to content
This repository was archived by the owner on Dec 11, 2019. It is now read-only.
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion core/src/main/resources/application.conf
Original file line number Diff line number Diff line change
Expand Up @@ -19,27 +19,35 @@ db {

languagesConfig {
haskell {
repository = "hackage"
repoIndexUrl = "http://hackage.haskell.org/packages/index.tar.gz"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you change this to https?

packageUrl = "https://hackage.haskell.org/package/%1$s-%2$s/%1$s-%2$s.tar.gz"
repoArchivePath = "./data/meta/haskell/index.tar.gz"
repoPath = "./data/meta/haskell/"
concurrentTasksCount = 30
}
rust {
repository = "crates"
repoIndexUrl = "https://github.com/rust-lang/crates.io-index/archive/master.zip"
packageUrl = "https://crates.io/api/v1/crates/%s/%s/download"
repoArchivePath = "./data/meta/rust/archive.zip"
repoPath = "./data/meta/rust/"
concurrentTasksCount = 30
ignoreFiles = ["test-max-version-example-crate", "version-length-checking-is-overrated", "config.json", "archive.zip", ".git"]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is config.json excluded? (Not saying it shouldn't be, just asking why)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@neongreen I can't answer why is so =( but it was originally. I will remove it, thanks.

}
ruby {
repository = "gem"
repoIndexUrl = "http://rubygems.org/latest_specs.4.8.gz"
packageUrl = "https://rubygems.org/downloads/%s-%s.gem"
repoArchivePath = "./data/meta/ruby/ruby_index.gz"
repoJsonPath = "./data/meta/ruby/ruby_index.json"
scriptPath = "./scripts/update_index.rb"
concurrentTasksCount = 30
}
javascript {
repository = "npm"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this a tab? Please no tabs

repoIndexUrl = "https://replicate.npmjs.com/_all_docs?include_docs=true"
repoJsonPath = "./data/meta/npm/npm_packages_index.json"
packageUrl = "https://registry.npmjs.org/%1$s/-/%1$s-%2$s.tgz"
concurrentTasksCount = 30
}
}
Expand Down
46 changes: 26 additions & 20 deletions core/src/main/scala/codesearch/core/Main.scala
Original file line number Diff line number Diff line change
Expand Up @@ -22,33 +22,39 @@ object Main extends IOApp {
lang: String = "all"
)

case class LangRep[A <: DefaultTable](
case class LangRep[A, F[_]](
db: DefaultDB[A],
langIndex: LanguageIndex[A],
metaDownloader: MetaDownloader[IO]
metaDownloader: MetaDownloader[F]
)

def run(args: List[String]): IO[ExitCode] =
def run(args: List[String]): IO[ExitCode] = {
Resource.make(IO(AsyncHttpClientFs2Backend[IO]()))(client => IO(client.close())).use { implicit httpClient =>
for {
params <- CLI.params(args)
config <- Config.load[IO]

unarchiver = Unarchiver[IO]
implicit0(downloader: Downloader[IO]) = Downloader.create[IO]

hackageMeta <- HackageMetaDownloader(config.languagesConfig.haskell, unarchiver, downloader)
cratesMeta <- CratesMetaDownloader(config.languagesConfig.rust, unarchiver, downloader)
gemMeta <- GemMetaDownloader(config.languagesConfig.ruby, downloader)
npmMeta <- NpmMetaDownloader(config.languagesConfig.javascript, downloader)

langReps = Map(
"haskell" -> LangRep[HackageTable](HackageDB, HaskellIndex(config), hackageMeta),
"rust" -> LangRep[CratesTable](CratesDB, RustIndex(config), cratesMeta),
"ruby" -> LangRep[GemTable](GemDB, RubyIndex(config), gemMeta),
"javascript" -> LangRep[NpmTable](NpmDB, JavaScriptIndex(config), npmMeta)
)
exitCode <- Program(langReps) >>= (_.run(params))
_ <- FlywayMigration.migrate[IO](config.db)
exitCode <- Transactor.create[IO](config.db).use { xa =>
for {
params <- CLI.params(args)

unarchiver = Unarchiver[IO]
implicit0(downloader: Downloader[IO]) = Downloader.create[IO]

hackageMeta <- HackageMetaDownloader(config.languagesConfig.haskell, unarchiver, downloader, xa)
cratesMeta <- CratesMetaDownloader(config.languagesConfig.rust, unarchiver, downloader)
gemMeta <- GemMetaDownloader(config.languagesConfig.ruby, downloader)
npmMeta <- NpmMetaDownloader(config.languagesConfig.javascript, downloader)

langReps = Map(
"haskell" -> LangRep[HackageTable, IO](HackageDB, HaskellIndex(config), hackageMeta),
"rust" -> LangRep[CratesTable, IO](CratesDB, RustIndex(config), cratesMeta),
"ruby" -> LangRep[GemTable, IO](GemDB, RubyIndex(config), gemMeta),
"javascript" -> LangRep[NpmTable, IO](NpmDB, JavaScriptIndex(config), npmMeta)
)
exitCode <- Program(langReps) >>= (_.run(params))
} yield exitCode
}
} yield exitCode
}
}
}
42 changes: 21 additions & 21 deletions core/src/main/scala/codesearch/core/Program.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3,64 +3,63 @@ package codesearch.core
import cats.effect._
import cats.instances.list._
import cats.syntax.applicative._
import cats.syntax.flatMap._
import cats.syntax.foldable._
import cats.syntax.traverse._
import cats.syntax.functor._
import codesearch.core.Main.{LangRep, Params}
import codesearch.core.model.DefaultTable
import io.chrisdavenport.log4cats.Logger
import codesearch.core.util.manatki.syntax.raise._
import io.chrisdavenport.log4cats.slf4j.Slf4jLogger

class Program(langReps: Map[String, LangRep[_ <: DefaultTable]], logger: Logger[IO]) {
case class InvalidLang(lang: String) extends RuntimeException(s"Unsupported language $lang")

def run(params: Params): IO[ExitCode] =
class Program[F[_]: Sync: ContextShift](
langReps: Map[String, LangRep[_ <: DefaultTable]],
logger: Logger[F]
) {

def run(params: Params): F[ExitCode] =
for {
_ <- if (params.lang == "all") {
logger.info("Codesearch-core started for all supported languages")
} else {
logger.info(s"Codesearch-core started for language ${params.lang}")
}

_ <- initDb(params).whenA(params.initDB)
_ <- downloadMeta(params).whenA(params.downloadMeta)
_ <- updatePackages(params).whenA(params.updatePackages)
_ <- buildIndex(params).whenA(params.buildIndex)

} yield ExitCode.Success

object InvalidLang extends RuntimeException(s"Unsupported language")

def findRepositories(lang: String): IO[List[LangRep[_]]] = {
def findRepositories(lang: String): F[List[LangRep[_]]] = {
if (lang == "all") {
IO.pure(langReps.values.toList)
langReps.values.toList.pure[F].widen
} else {
langReps.get(lang) match {
case Some(l) => IO.pure(List(l))
case None => IO.raiseError(InvalidLang)
case Some(l) => List(l).pure[F].widen
case None => InvalidLang(lang).raise
}
}
}

def initDb(params: Params): IO[Unit] =
for {
languages <- findRepositories(params.lang)
_ <- languages.traverse_(_.db.initDB)
} yield ()

def downloadMeta(params: Params): IO[Unit] = {
def downloadMeta(params: Params): F[Unit] = {
for {
languages <- findRepositories(params.lang)
_ <- languages.traverse_(_.metaDownloader.downloadMeta)
_ <- languages.traverse_(_.metaDownloader.download)
} yield ()
}

def updatePackages(params: Params): IO[Unit] =
def updatePackages(params: Params): F[Unit] =
for {
languages <- findRepositories(params.lang)
updated <- languages.traverse(_.langIndex.updatePackages(params.limitedCountPackages))
_ <- logger.info(s"Updated: ${updated.sum}")
} yield ()

def buildIndex(params: Params): IO[Unit] =
def buildIndex(params: Params): F[Unit] =
for {
languages <- findRepositories(params.lang)
_ <- languages.traverse_(_.langIndex.buildIndex)
Expand All @@ -69,6 +68,7 @@ class Program(langReps: Map[String, LangRep[_ <: DefaultTable]], logger: Logger[
}

object Program {
def apply(langReps: Map[String, LangRep[_ <: DefaultTable]]): IO[Program] =
Slf4jLogger.fromClass[IO](getClass).map(logger => new Program(langReps, logger))
def apply[F[_]: Sync](
langReps: Map[String, LangRep[_ <: DefaultTable]]
): F[Program[F]] = Slf4jLogger.fromClass[F](getClass).map(logger => new Program(langReps, logger))
}
7 changes: 7 additions & 0 deletions core/src/main/scala/codesearch/core/config/CindexConfig.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package codesearch.core.config

case class CindexConfig(
indexDir: String,
tempIndexDir: String,
packagesToIndexFile: String
)
59 changes: 50 additions & 9 deletions core/src/main/scala/codesearch/core/config/Config.scala
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,15 @@ import cats.effect.Sync
import pureconfig.module.catseffect._
import pureconfig.{CamelCase, ConfigFieldMapping, ProductHint}

trait RepositoryConfig {
def repository: String
def repoIndexUrl: URI
}

trait ArchivedIndexConfig extends RepositoryConfig {
def repoArchivePath: Path
}

case class Config(
db: DatabaseConfig,
snippetConfig: SnippetConfig,
Expand All @@ -19,7 +28,13 @@ case class DatabaseConfig(
port: Int,
name: String,
user: String,
password: String
password: String,
properties: DatabaseProperties
)

case class DatabaseProperties(
driver: String,
url: String
)

case class SnippetConfig(
Expand All @@ -36,31 +51,57 @@ case class LanguagesConfig(
)

case class HaskellConfig(
repository: String,
repoIndexUrl: URI,
repoArchivePath: Path,
repoPath: Path,
concurrentTasksCount: Int
)
downloaderConfig: PackageDownloaderConfig
) extends ArchivedIndexConfig

case class RubyConfig(
repository: String,
repoIndexUrl: URI,
repoArchivePath: Path,
repoJsonPath: Path,
scriptPath: Path,
concurrentTasksCount: Int
)
downloaderConfig: PackageDownloaderConfig
) extends ArchivedIndexConfig

case class RustConfig(
repository: String,
repoIndexUrl: URI,
repoArchivePath: Path,
repoPath: Path,
concurrentTasksCount: Int
)
ignoreFiles: Set[String],
downloaderConfig: PackageDownloaderConfig
) extends ArchivedIndexConfig

case class JavaScriptConfig(
repository: String,
repoIndexUrl: URI,
repoJsonPath: Path,
concurrentTasksCount: Int
downloaderConfig: PackageDownloaderConfig
) extends RepositoryConfig

case class SourcesUpdaterConfig()

case class PackageDownloaderConfig(
packageUrl: String,
packageArchivePath: String,
packageSourcesPath: String,
filterConfig: SourcesFilterConfig,
)

case class SourcesFilterConfig(
allowedFileNames: Set[String]
)

case class SourcesExtraConfig(
testDirs: Set[String],
)

case class RateLimiterConfig(
numberTasks: Int,
per: Int
)

case class MetricsConfig(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package codesearch.core.config

case class SourcesFilesConfig(
testDirsNames: Set[String],
allowedFileNames: Set[String],
filesExtensions: FilesExtensionsConfig
)

case class FilesExtensionsConfig(
commonExtensions: Set[String],
sourceExtensions: Set[String],
) { def extensions: Set[String] = commonExtensions ++ sourceExtensions }
4 changes: 2 additions & 2 deletions core/src/main/scala/codesearch/core/db/DefaultDB.scala
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,9 @@ trait DefaultDB[T <: DefaultTable] {
}

def initDB: IO[Unit] =
IO.fromFuture(IO(db.run(MTable.getTables))).flatMap { vector =>
IO.fromFuture(IO(db.run(MTable.getTables))).flatMap { tables =>
IO(
if (!vector.exists(_.name.name == table.baseTableRow.tableName))
if (!tables.exists(_.name.name == table.baseTableRow.tableName))
db.run(table.schema.create)
)
}
Expand Down
19 changes: 19 additions & 0 deletions core/src/main/scala/codesearch/core/db/FlywayMigration.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package codesearch.core.db

import cats.effect.Sync
import codesearch.core.config.DatabaseConfig
import org.flywaydb.core.Flyway

object FlywayMigration {
def migrate[F[_]: Sync](config: DatabaseConfig): F[Unit] = Sync[F].delay {
Flyway
.configure()
.dataSource(
config.properties.url,
config.user,
config.password
)
.load()
.migrate()
}
}
24 changes: 24 additions & 0 deletions core/src/main/scala/codesearch/core/db/Transactor.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package codesearch.core.db

import cats.effect.{Async, ContextShift, Resource}
import codesearch.core.config.DatabaseConfig
import doobie.hikari.HikariTransactor
import doobie.util.ExecutionContexts

object Transactor {
def create[F[_]: Async: ContextShift](config: DatabaseConfig): Resource[F, HikariTransactor[F]] = {
import config._
for {
connectEC <- ExecutionContexts.fixedThreadPool[F](32)
transactionEC <- ExecutionContexts.cachedThreadPool[F]
xa <- HikariTransactor.newHikariTransactor(
properties.driver,
properties.url,
user,
password,
connectEC,
transactionEC
)
} yield xa
}
}
Loading