Commit f73c9fad authored by Bjørnar Hansen's avatar Bjørnar Hansen Committed by Bjørnar Hansen
Browse files

Open source release.

parents
dist/
.cabal-sandbox/
cabal.sandbox.config
*.swp
*.o
*.hi
*.prof
*.hp
Copyright (c) 2015, Tingtun AS, http://tingtun.no
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import Distribution.Simple
main = defaultMain
name: databus
version: 0.2.0.0
cabal-version: >=1.20
build-type: Simple
license: BSD3
license-file: LICENSE
copyright: 2015 Tingtun AS
author: Various
maintainer: bjornar.hansen@tingtun.no
homepage: http://eiii.eu
category: Web
description: EIII Databus
synopsis: The EIII databus ties together the components in the EIII project.
library
build-depends:
base >=4.7.0.2 && <4.9,
lifted-base >=0.2.3 && <0.3,
bytestring >=0.10.6 && <0.11,
lifted-async >=0.7 && <0.8,
containers >=0.5.6,
checker-common >=0.2 && <0.3,
ixset-typed,
simple-logging,
lens >=4.9 && <4.10,
msgpack >=0.7 && <1.0,
monads-tf >=0.1.0.2 && <0.2,
uuid >=1.3.11 && <1.4,
ttrpc >=0.1 && <0.2,
stm >=2.4.4 && <2.5,
time >=1.5 && <1.6,
transformers-base >=0.4.4 && <0.5,
iproute >=1.4 && <1.5,
text >=1.2.1.0 && <1.3,
postgresql-simple >=0.4.1 && <0.5,
zeromq4-haskell >=0.6.3 && <0.7,
dns >=1.4.5 && <1.5,
network-uri >=2.6 && <2.7,
binary>=0.7.3 && <0.8,
directory >=1.2.2 && <1.3,
opaleye >=0.4,
profunctors >=4.4.1,
product-profunctors >=0.6.3,
email-validate >=2.1.3 && <2.2,
optparse-applicative >=0.11.0.2,
-- to be removed
checker-sampler,
postgresql-libpq,
stm,
aeson,
-- not a dependency, but lock version for dep resolution's sake
attoparsec >= 0.12.1.6 && < 0.13
default-language: Haskell2010
default-extensions:
MultiParamTypeClasses,
NamedFieldPuns,
OverloadedLists,
OverloadedStrings,
PackageImports,
RecordWildCards,
ScopedTypeVariables,
StandaloneDeriving,
TupleSections,
TypeSynonymInstances,
FlexibleContexts,
DeriveGeneric
other-extensions:
DataKinds,
ExistentialQuantification,
RankNTypes,
TemplateHaskell,
TypeFamilies
exposed-modules:
Controller,
Database,
AvailabilityQueue
Connection,
Types,
Serialize.Callback
Serialize.Connection
Serialize.ConnState
other-modules:
Database.Opaleye
hs-source-dirs: src
ghc-options: -Wall -fno-warn-deprecations
ghc-prof-options: -Wall -fno-warn-deprecations -fprof-auto -rtsopts
executable master
build-depends:
databus,
base >=4.7.0.2 && <4.9,
lifted-base >=0.2.3 && <0.3,
bytestring >=0.10.6 && <0.11,
lifted-async >=0.7 && <0.8,
containers >=0.5.6,
checker-common >=0.2 && <0.3,
ixset-typed,
simple-logging,
lens >=4.9 && <4.10,
msgpack >=0.7 && <1.0,
monads-tf >=0.1.0.2 && <0.2,
uuid >=1.3.11 && <1.4,
ttrpc >=0.1 && <0.2,
stm >=2.4.4 && <2.5,
time >=1.5 && <1.6,
transformers-base >=0.4.4 && <0.5,
iproute >=1.4 && <1.5,
text >=1.2.1.0 && <1.3,
postgresql-simple >=0.4.1 && <0.5,
zeromq4-haskell >=0.6.3 && <0.7,
dns >=1.4.5 && <1.5,
network-uri >=2.6 && <2.7,
binary>=0.7.3 && <0.8,
directory >=1.2.2 && <1.3,
optparse-applicative >=0.11.0.2,
-- to be removed
checker-sampler,
postgresql-libpq,
stm,
aeson
main-is: Master.hs
hs-source-dirs: executables
default-language: Haskell2010
default-extensions:
MultiParamTypeClasses,
NamedFieldPuns,
OverloadedLists,
OverloadedStrings,
PackageImports,
RecordWildCards,
ScopedTypeVariables,
StandaloneDeriving,
TupleSections,
TypeSynonymInstances,
FlexibleContexts,
DeriveGeneric
other-extensions:
DataKinds,
ExistentialQuantification,
RankNTypes,
TemplateHaskell,
TypeFamilies
ghc-options: -Wall -threaded -fno-warn-deprecations
ghc-prof-options: -Wall -fno-warn-deprecations -threaded -fprof-auto -rtsopts
executable crawler-controller
build-depends:
databus,
base >=4.7.0.2 && <4.9,
containers >=0.5.6,
ixset-typed,
split >=0.2.2 && <0.3,
ttrpc >=0.1 && <0.2,
text >=1.2.1.0 && <1.3,
postgresql-simple >=0.4.1 && <0.5
main-is: CrawlerController.hs
hs-source-dirs: executables
default-language: Haskell2010
default-extensions:
MultiParamTypeClasses,
NamedFieldPuns,
OverloadedStrings,
TupleSections,
PackageImports,
StandaloneDeriving,
RecordWildCards,
OverloadedLists,
ScopedTypeVariables,
FlexibleContexts,
DeriveGeneric
other-extensions:
TemplateHaskell,
ExistentialQuantification,
RankNTypes,
TypeFamilies,
DataKinds
ghc-options: -Wall -threaded -fno-warn-deprecations
ghc-prof-options: -Wall -fno-warn-deprecations -threaded -fprof-auto -rtsopts
executable sampler-controller
build-depends:
databus,
base >=4.7.0.2 && <4.9,
checker-sampler,
containers >=0.5.6,
ixset-typed,
ttrpc >=0.1 && <0.2,
text >=1.2.1.0 && <1.3,
random >=1.0.1.1 && <1.2,
postgresql-simple >=0.4.1 && <0.5
main-is: SamplerController.hs
hs-source-dirs: executables
default-language: Haskell2010
default-extensions:
MultiParamTypeClasses,
NamedFieldPuns,
OverloadedStrings,
TupleSections,
PackageImports,
StandaloneDeriving,
RecordWildCards,
OverloadedLists,
ScopedTypeVariables,
FlexibleContexts,
DeriveGeneric
other-extensions:
TemplateHaskell,
ExistentialQuantification,
RankNTypes,
TypeFamilies,
DataKinds
ghc-options: -Wall -threaded -fno-warn-deprecations
ghc-prof-options: -Wall -fno-warn-deprecations -threaded -fprof-auto -rtsopts
executable webpage-wam-controller
build-depends:
databus,
base >=4.7.0.2 && <4.9,
lifted-base >=0.2.3 && <0.3,
containers >=0.5.6,
ixset-typed,
msgpack >=0.7 && <1.0,
ttrpc >=0.1 && <0.2,
text >=1.2.1.0 && <1.3,
random >=1.0.1.1 && <1.2,
postgresql-simple >=0.4.1 && <0.5
main-is: WebpageWamController.hs
hs-source-dirs: executables
default-language: Haskell2010
default-extensions:
MultiParamTypeClasses,
NamedFieldPuns,
OverloadedStrings,
TupleSections,
PackageImports,
StandaloneDeriving,
RecordWildCards,
OverloadedLists,
ScopedTypeVariables,
FlexibleContexts,
DeriveGeneric
other-extensions:
TemplateHaskell,
ExistentialQuantification,
RankNTypes,
TypeFamilies,
DataKinds
ghc-options: -Wall -threaded -fno-warn-deprecations
ghc-prof-options: -Wall -fno-warn-deprecations -threaded -fprof-auto -rtsopts
executable checkerctl
build-depends:
base >=4.7.0.2 && <4.9,
lifted-async >=0.7 && <0.8,
aeson >=0.8.0.2 && <0.9,
bytestring >=0.10.4 && <0.11,
checker-common >=0.2 && <0.3,
containers >=0.5.5.1 && <0.6,
directory >=1.2.1 && <1.3,
filepath >=1.3.0.2 && <1.5,
msgpack >=0.7.2.5 && <0.8,
network-uri >=2.6 && <2.7,
optparse-applicative >=0.11.0.1 && <0.12,
random >=1.0.1.1 && <1.2,
stm >=2.4.4 && <2.5,
text >=1.2.1.0 && <1.3,
time >=1.4.2 && <1.6,
transformers >=0.3 && <0.5,
transformers-base >=0.4.4 && <0.5,
ttrpc >=0.1 && <0.2,
uuid >=1.3.11 && <1.4,
yaml >=0.8.10.1 && <0.9,
zeromq4-haskell >=0.6.3 && <0.7,
monads-tf >=0.1.0.2 && <0.2
main-is: Ctl.hs
hs-source-dirs: executables
default-language: Haskell2010
default-extensions:
MultiParamTypeClasses,
NamedFieldPuns,
TupleSections,
PackageImports,
StandaloneDeriving,
RecordWildCards,
ScopedTypeVariables,
FlexibleContexts,
DeriveGeneric
other-extensions:
TemplateHaskell,
ExistentialQuantification,
RankNTypes,
TypeFamilies,
DataKinds
ghc-options: -Wall -fno-warn-deprecations -threaded
ghc-prof-options: -Wall -fno-warn-deprecations -threaded -fprof-auto -rtsopts
executable httpctl
build-depends:
base >=4.7.0.2 && <4.9,
bytestring >=0.10.4 && <0.11,
checker-common >=0.2 && <0.3,
containers >=0.5.5.1 && <0.6,
msgpack >=0.7.2.5 && <0.8,
network-uri >=2.6 && <2.7,
text >=1.2.1.0 && <1.3,
ttrpc >=0.1 && <0.2,
uuid >=1.3.11 && <1.4,
zeromq4-haskell >=0.6.3 && <0.7,
transformers >=0.3 && <0.5,
transformers-base >=0.4.4 && <0.5,
mtl >=2.2.1 && <2.3,
http-types >=0.8.6 && <0.9,
email-validate >=2.1.3 && <2.2,
optparse-applicative >=0.11.0.2,
databus,
wai-extra,
aeson,
simple-logging,
scotty
main-is: HttpCtl.hs
hs-source-dirs: executables
default-language: Haskell2010
default-extensions:
MultiParamTypeClasses,
NamedFieldPuns,
TupleSections,
PackageImports,
StandaloneDeriving,
RecordWildCards,
ScopedTypeVariables,
FlexibleContexts,
OverloadedStrings,
DeriveGeneric
other-extensions:
TemplateHaskell,
ExistentialQuantification,
RankNTypes,
TypeFamilies,
DataKinds
ghc-options: -Wall -fno-warn-deprecations -threaded
ghc-prof-options: -Wall -fno-warn-deprecations -threaded -fprof-auto -rtsopts
{-# LANGUAGE FlexibleContexts #-}
module Main where
import Connection
import Control.Applicative (empty)
import Control.Concurrent
import Control.Monad
import Data.Either (partitionEithers)
import qualified Data.IxSet.Typed as Ix
import Data.List.Split (chunksOf)
import Data.Monoid ((<>))
import Data.Set (Set)
import qualified Data.Set as S
import Data.Text (Text)
import qualified Data.Text as T
import qualified Database as DB
import qualified Database.PostgreSQL.Simple as PSQL
import Controller hiding (State ())
import qualified Controller
import Network.TTRPC hiding (asyncCall, connection)
import qualified Network.TTRPC as RPC
type State = Controller.State CrawlerResult
main :: IO ()
main = parseOptions "tcp://127.0.0.1:9001" >>= runWithOptions
runWithOptions :: ControllerOptions -> IO ()
runWithOptions ControllerOptions{listenAddress,masterAddress,psqlConnString} =
withLogging ("logs/" ++ T.unpack myname ++ ".log") $ do
state@State{..} <- mkState myname listenAddress psqlConnString masterAddress
void . forkIO $ processResults state
void . forkIO $ crawlSites state
void . forkIO $ updateLoads state 5
void . forkIO $ register state listenAddress
void . forkIO $ todoWatcher state 50
controller state sScr sCtx 10 $
serve "notify" (crawl state)
<> serve "crawl" (crawl state)
<> serve "kill" (kill state)
where myname = "crawler-controller"
-- ** Server methods
crawl :: State -> Set UUID -> Noreply (IO ())
crawl state@State{sPsql} uuids = return $ do
logDebug "crawl"
let uuids' = S.toList uuids
siteNames' <- withMVar sPsql $
flip DB.getSiteUris uuids'
forM_ (chunksOf 10 siteNames') $ \siteNames -> do
(noResolve, resolved) <- partitionEithers <$> resolveUris siteNames
addTodo state (Ix.fromList resolved)
forM_ noResolve $ \(uuid', addr) -> do
let e = "Could not resolve " ++ show addr
logWarn $ show uuid' ++ " - " ++ e
kill :: State -> UUID -> RPC () ()
kill state@State{sTodo,sPsql} uuid = liftBase $ do
_killed <- removeTodo sPsql sTodo uuid
killJobUuid state uuid
assumedCrawlerLoad :: Int
assumedCrawlerLoad = 100
-- ** Crawling
crawlSites :: State -> IO ()
crawlSites state@State{..} = forever' "crawlSites" $ do
-- 1. pull a site-uuid from 'sTodo'
site@Site{uuid} <- getNewTask state
-- 2. get the crawler rules from the DB.
crawlerRules <- (withMVar sPsql $ flip DB.getCrawlerRules uuid)
>>= maybe empty return
-- 3. get a free crawler
crawler <- atomically $ pickConn sConnections "crawler" assumedCrawlerLoad
-- 4. submit crawl -- this migth fail, in which case we
-- free up the crawler again and stop processing.
crawlerJob <- submitCrawl sScr crawler crawlerRules site
>>= maybe empty return
-- 5. add asyncjob to set of asyncCalls.
addJob state crawlerJob
-- 6. try to remove the siteUuid from sTodo.
-- - if it fails, the task has been removed (killed) already, so
-- kill the crawl and remove the associated asyncCall.
removed <- removeTodo sPsql sTodo uuid
unless removed .
liftBase $ killJob state crawlerJob
submitCrawl :: MonadBase IO m
=> ServerCallbacksRef
-> ConnState
-> CrawlerRules
-> Site
-> m (Maybe (Callback CrawlerResult))
submitCrawl scr crawler rules site@Site{uuid} = do
euuid <- runErrorT . call $ methodCrawl (crawler^.csConn) rules
case euuid of
Left e -> do liftBase . logErr $ show uuid ++ " - Failed to start the crawl: " ++ show e
return Nothing
Right jobUuid -> do
ej <- runErrorT . RPC.asyncCall scr $ methodPoll (crawler^.csConn) jobUuid
case ej of
Right j -> do liftBase . logDebug $ show uuid ++ " - Successfully started crawl"
return . Just $ Callback j site crawler Nothing
Left e -> do liftBase . logErr $ show uuid ++ " - Failed to start polling for crawl results: " ++ show e
return Nothing
methodPoll :: Conn
-> String -- ^ UUID of the job to wait for
-> Method String (CrawlerResult,Int)
methodPoll = method "poll"
methodCrawl :: Conn
-> CrawlerRules
-> Method () String
methodCrawl = method "crawl"
processResults :: State -> IO ()
processResults state@State{sResultsQueue} = forever' "processResults" $ do
r' <- (atomically $ readTQueue sResultsQueue)
case r' of
(uuid', Right r) -> handleFinished state uuid' r
(uuid', Left e) -> handleError state uuid' e
seconds :: Int
seconds = 1000000
-- Handle the result of a job
handleFinished :: State -> UUID -> CrawlerResult -> IO ()
handleFinished state@State{sMaster,sPsql} uuid result = do
logDebug $ show uuid ++ " - Got job result."
processResult sPsql uuid result
notify sMaster uuid
`finally` (void $ removeJobUuid state uuid)
-- | Store the result to the DB.
processResult :: MVar PSQL.Connection -> UUID -> CrawlerResult -> IO ()
processResult psql siteUuid crawlResult = do
withMVar psql $ \psql' ->
DB.updateCrawlerResult psql' siteUuid crawlResult
logDebug $ show siteUuid ++ " - Stored crawl result."
-- | Notify master about a completed job.
notify :: Conn -> UUID -> IO ()
notify conn = method "notify" conn ("crawl-complete"::Text)
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PackageImports #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE CPP #-}
module Main (main) where
import Control.Concurrent (forkIO, threadDelay)
import Control.Concurrent.Async.Lifted
import Control.Monad
import "monads-tf" Control.Monad.Error (runErrorT)
import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.Lazy.Char8 as BSL
import qualified Data.Aeson as Json
import Data.Either
#if MIN_VERSION_base(4,8,0)
#else
import Data.Foldable (foldMap)
#endif
import Data.List (intercalate)
import Data.Map (Map)
import qualified Data.Map as M
import Data.MessagePack (OBJECT, Object)
import Data.Monoid
import Data.Text (Text)
import qualified Data.Text as T
import Data.UUID (UUID)
import qualified Data.Yaml.Aeson as Yaml
import Network.URI (URI)
import qualified Network.URI as URI
import Options.Applicative
import System.Random (randomRIO)
import qualified System.ZMQ4 as Z
import Network.TTRPC
import Web.Checker.Common
ping :: Conn -> IO String
ping = method "ping"
load :: Conn -> IO Int
load = method "load"
ping' :: Conn -> IO ()
ping' = putStrLn <=< ping
createTestrun :: Conn
-> TestrunRules
-> Map String (Maybe SiteName, CrawlerRules, SamplerRules, CheckerRules)
-> Maybe Text -- owner
-> IO String
createTestrun = method "create-testrun"
startTestrun :: Conn
-> TestrunRules
-> Map String (Maybe SiteName, CrawlerRules, SamplerRules, CheckerRules)
-> Maybe Text
-> IO String
startTestrun = method "start-testrun"
createUser :: Conn -> Text -> Maybe Text -> IO UUID
createUser = method "create-user"
startStoredTestrun :: Conn
-> UUID
-> Maybe Text
-> IO UUID
startStoredTestrun = method "start-stored-testrun"
startSiteCheck :: Conn
-> URI
-> IO UUID
startSiteCheck = method "start-site-check"
rerunSite,rerunCrawledSite,rerunSampledSite
:: Conn
-> UUID
-> String
-> IO String
rerunSite = method "rerun-site"
rerunCrawledSite = method "rerun-crawled-site"
rerunSampledSite = method "rerun-sampled-site"
rerunTestrun :: Conn -> UUID -> IO [String]
rerunTestrun = method "rerun-testrun"
-- | Start a check on the databus.
check :: Conn
-> ToolId
-> URI.URI
-> Method String Object
check = method "check"
-- | Start a check on the wam.
check' :: Conn
-> Text
-> Method String Object
check' c uri = method "check" c ("webpage-wam" :: ToolId) uri
getPageResult :: Conn -> UUID -> IO (PageResult Object)
getPageResult = method "get-page-result"
addComponent :: Conn -> ToolId -> URI -> IO ()
addComponent = method "add-component"
addController :: Conn -> Text -> URI -> IO ()
addController = method "add-controller"
lsCrawlers,lsProxies