Merge pull request #379 from abf/rosa-build:376-add-monitoring-tool

#376: Monitoring tool
This commit is contained in:
avm 2014-04-18 12:58:34 +04:00
commit 9e55a362d9
29 changed files with 390 additions and 82 deletions

View File

@ -0,0 +1,91 @@
ActiveAdmin.register NodeInstruction do
menu priority: 3
controller do
def scoped_collection
NodeInstruction.includes(:user)
end
end
filter :user_uname, as: :string
filter :status, as: :select, collection: NodeInstruction::STATUSES
filter :updated_at
index do
column :id
column :user
column(:status, sortable: :status) do |ni|
status_tag(ni.status, status_color(ni))
end
column :updated_at
default_actions
end
form do |f|
f.inputs do
f.input :user, as: :select, include_blank: false, collection: User.system.map { |u| [u.uname, u.id] }
f.input :status, as: :select, include_blank: false, collection: NodeInstruction::STATUSES
f.input :instruction, as: :text
end
f.actions
end
show do
attributes_table do
row :id
row :user
row(:status, sortable: :status) do |ni|
status_tag(ni.status, status_color(ni))
end
row :created_at
row :updated_at
row :instruction
row(:output) do |ni|
ni.output.to_s.lines.join('<br/>').html_safe
end
end
end
sidebar 'Actions', only: :show do
%w(disable ready restart restart_failed).each do |state|
div do
link_to state.humanize, force_admin_node_instruction_path(resource, state: state), method: :patch
end if resource.send("can_#{state}?")
end
end
sidebar 'Actions', only: :index do
locked = NodeInstruction.all_locked?
span(class: "status_tag #{locked ? 'red' : 'green'}") do
if locked
link_to 'Unlock instructions', unlock_all_admin_node_instructions_path, method: :post
else
link_to 'Lock instructions', lock_all_admin_node_instructions_path, method: :post
end
end
end
collection_action :lock_all, method: :post do
NodeInstruction.lock_all
flash[:notice] = 'Locked successfully'
redirect_to admin_node_instructions_path
end
collection_action :unlock_all, method: :post do
NodeInstruction.unlock_all
flash[:notice] = 'Unlocked successfully'
redirect_to admin_node_instructions_path
end
member_action :force, method: :patch do
resource.send(params[:state])
flash[:notice] = 'Updated successfully'
redirect_to admin_node_instruction_path(resource)
end
end

View File

@ -0,0 +1,12 @@
def status_color(ni)
case
when ni.ready?
:green
when ni.disabled?
nil
when ni.failed?
:red
else
:orange
end
end

View File

@ -9,7 +9,9 @@ class Api::V1::JobsController < Api::V1::BaseController
def shift
@build_list = BuildList.next_build if current_user.system?
unless @build_list
if @build_list
set_builder
else
platform_ids = Platform.where(name: params[:platforms].split(',')).pluck(:id) if params[:platforms].present?
arch_ids = Arch.where(name: params[:arches].split(',')).pluck(:id) if params[:arches].present?
build_lists = BuildList.for_status(BuildList::BUILD_PENDING).scoped_to_arch(arch_ids).
@ -19,36 +21,29 @@ class Api::V1::JobsController < Api::V1::BaseController
ActiveRecord::Base.transaction do
if current_user.system?
@build_list ||= build_lists.external_nodes(:everything).first
@build_list.touch if @build_list
else
@build_list = build_lists.external_nodes(:owned).for_user(current_user).first
@build_list ||= build_lists.external_nodes(:everything).
accessible_by(current_ability, :everything).readonly(false).first
if @build_list
@build_list.builder = current_user
@build_list.save
end
set_builder
end
end
end
end
job = {
worker_queue: @build_list.worker_queue_with_priority(false),
worker_class: @build_list.worker_queue_class,
:worker_args => [@build_list.abf_worker_args]
} if @build_list
render json: { job: job }.to_json
end
def statistics
if params[:uid].present?
RpmBuildNode.create(
:id => params[:uid],
:user_id => current_user.id,
:system => current_user.system?,
id: params[:uid],
user_id: current_user.id,
system: current_user.system?,
worker_count: params[:worker_count],
busy_workers: params[:busy_workers]
) rescue nil
@ -86,4 +81,12 @@ class Api::V1::JobsController < Api::V1::BaseController
end
end
protected
def set_builder
return unless @build_list
@build_list.builder = current_user
@build_list.save
end
end

View File

@ -1,5 +1,5 @@
class BuildListsPublishTaskManagerJob
@queue = :hook
@queue = :middle
def self.perform
AbfWorker::BuildListsPublishTaskManager.new.run

View File

@ -1,5 +1,5 @@
class BuildListsQueuesMonitoringJob
@queue = :hook
@queue = :middle
def self.perform
Redis.current.smembers('resque:queues').each do |key|

View File

@ -1,5 +1,5 @@
class CleanApiDefenderStatisticsJob
@queue = :clone_build
@queue = :low
def self.perform
deadline = Date.today - 1.month

View File

@ -1,5 +1,5 @@
class CleanRpmBuildNodeJob
@queue = :hook
@queue = :middle
def self.perform
RpmBuildNode.all.each do |n|

View File

@ -0,0 +1,11 @@
class RestartNodesJob
@queue = :low
def self.perform
return if NodeInstruction.all_locked?
available_nodes = RpmBuildNode.all.map{ |n| n.user_id if n.user.try(:system?) }.compact.uniq
NodeInstruction.where(status: NodeInstruction::READY).
where.not(user_id: available_nodes).find_each(&:restart)
end
end

View File

@ -259,8 +259,8 @@ class BuildList < ActiveRecord::Base
end
end
later :publish, queue: :clone_build
later :add_job_to_abf_worker_queue, queue: :clone_build
later :publish, queue: :middle
later :add_job_to_abf_worker_queue, queue: :middle
HUMAN_CONTAINER_STATUSES = { WAITING_FOR_RESPONSE => :waiting_for_publish,
BUILD_PUBLISHED => :container_published,
@ -576,7 +576,7 @@ class BuildList < ActiveRecord::Base
def delayed_add_job_to_abf_worker_queue(*args)
restart_job if status == BUILD_PENDING
end
later :delayed_add_job_to_abf_worker_queue, delay: 60, queue: :clone_build
later :delayed_add_job_to_abf_worker_queue, delay: 60, queue: :middle
protected

View File

@ -6,7 +6,7 @@ module FileStoreClean
destroy_files_from_file_store if Rails.env.production?
super
end
later :destroy, queue: :clone_build
later :destroy, queue: :middle
def sha1_of_file_store_files
raise NotImplementedError, "You should implement this method"
@ -31,7 +31,7 @@ module FileStoreClean
def later_destroy_files_from_file_store(args)
destroy_files_from_file_store(args)
end
later :later_destroy_files_from_file_store, queue: :clone_build
later :later_destroy_files_from_file_store, queue: :middle
end
def self.file_exist_on_file_store?(sha1)

View File

@ -59,7 +59,7 @@ class MassBuild < ActiveRecord::Base
end
end
end
later :build_all, queue: :clone_build
later :build_all, queue: :low
def generate_failed_builds_list
generate_list BuildList::BUILD_ERROR
@ -75,17 +75,17 @@ class MassBuild < ActiveRecord::Base
bl.cancel
end
end
later :cancel_all, queue: :clone_build
later :cancel_all, queue: :low
def publish_success_builds(user)
publish user, BuildList::SUCCESS, BuildList::FAILED_PUBLISH
end
later :publish_success_builds, queue: :clone_build
later :publish_success_builds, queue: :low
def publish_test_failed_builds(user)
publish user, BuildList::TESTS_FAILED
end
later :publish_test_failed_builds, queue: :clone_build
later :publish_test_failed_builds, queue: :low
COUNT_STATUSES.each do |stat|
stat_count = "#{stat}_count"

View File

@ -0,0 +1,88 @@
class NodeInstruction < ActiveRecord::Base
STATUSES = [
DISABLED = 'disabled',
READY = 'ready',
RESTARTING = 'restarting',
FAILED = 'failed'
]
LOCK_KEY = 'NodeInstruction::lock-key'
belongs_to :user
scope :duplicate, -> id, user_id {
where.not(id: id.to_i).where(user_id: user_id, status: STATUSES - [DISABLED])
}
attr_encrypted :instruction, key: APP_CONFIG['keys']['node_instruction_secret_key']
validates :user, presence: true
validates :instruction, presence: true, length: { maximum: 10000 }
validates :status, presence: true
validate -> {
errors.add(:status, 'Can be only single active instruction for each node') if !disabled? && NodeInstruction.duplicate(id.to_i, user_id).exists?
}
attr_accessible :instruction, :user_id, :output, :status
state_machine :status, initial: :ready do
after_transition(on: :restart) do |instruction, transition|
instruction.perform_restart
end
event :ready do
transition %i(ready restarting disabled failed) => :ready
end
event :disable do
transition ready: :disabled
end
event :restart do
transition ready: :restarting
end
event :restart_failed do
transition restarting: :failed
end
end
def perform_restart
restart_failed if NodeInstruction.all_locked?
success = false
stdout = ''
instruction.lines.each do |command|
next if command.blank?
command.chomp!; command.strip!
stdout << %x[ #{command} 2>&1 ]
success = $?.success?
end
build_lists = BuildList.where(builder_id: user_id, external_nodes: [nil, '']).
for_status(BuildList::BUILD_STARTED)
build_lists.find_each do |bl|
bl.update_column(:status, BuildList::BUILD_PENDING)
bl.restart_job
end
update_column(:output, stdout)
success ? ready : restart_failed
end
later :perform_restart, queue: :low
def self.all_locked?
Redis.current.get(LOCK_KEY).present?
end
def self.lock_all
Redis.current.set(LOCK_KEY, 1)
end
def self.unlock_all
Redis.current.del(LOCK_KEY)
end
end

View File

@ -205,7 +205,7 @@ class Platform < ActiveRecord::Base
def destroy
with_skip {super} # avoid cascade XML RPC requests
end
later :destroy, queue: :clone_build
later :destroy, queue: :low
def default_host
EventLog.current_controller.request.host_with_port rescue ::Rosa::Application.config.action_mailer.default_url_options[:host]
@ -274,7 +274,7 @@ class Platform < ActiveRecord::Base
def fs_clone(old_name = parent.name, new_name = name)
FileUtils.cp_r "#{parent.path}/repository", path
end
later :fs_clone, queue: :clone_build
later :fs_clone, queue: :low
def freeze_platform_and_update_repos
if released_changed? && released == true

View File

@ -109,7 +109,7 @@ class Project < ActiveRecord::Base
end
def init_mass_import
Project.perform_later :clone_build, :run_mass_import, url, srpms_list, visibility, owner, add_to_repository_id
Project.perform_later :low, :run_mass_import, url, srpms_list, visibility, owner, add_to_repository_id
end
def name_with_owner
@ -417,7 +417,7 @@ class Project < ActiveRecord::Base
PullRequest.where(from_project_id: id).each{ |p| p.update_relations(old_name) }
pull_requests.where('from_project_id != to_project_id').each(&:update_relations)
end
later :update_path_to_project, queue: :clone_build
later :update_path_to_project, queue: :middle
def check_default_branch
if self.repo.branches.count > 0 && self.repo.branches.map(&:name).exclude?(self.default_branch)

View File

@ -57,7 +57,7 @@ class PullRequest < ActiveRecord::Base
system 'git', 'remote', 'set-url', 'head', from_project.path if cross_pull?
end
end
later :update_relations, queue: :clone_build
later :update_relations, queue: :middle
def cross_pull?
from_project_id != to_project_id

View File

@ -59,8 +59,7 @@ class Repository < ActiveRecord::Base
from.projects.find_each {|p| self.projects << p}
end
end
later :clone_relations, loner: true, queue: :clone_build
later :clone_relations, loner: true, queue: :low
def add_projects(list, user)
current_ability = Ability.new(user)
list.lines.each do |line|
@ -75,7 +74,7 @@ class Repository < ActiveRecord::Base
end
end
end
later :add_projects, queue: :clone_build
later :add_projects, queue: :middle
def remove_projects(list)
list.lines.each do |name|
@ -87,7 +86,7 @@ class Repository < ActiveRecord::Base
end
end
end
later :remove_projects, queue: :clone_build
later :remove_projects, queue: :middle
def full_clone(attrs = {})
base_clone(attrs).tap do |c|
@ -148,7 +147,7 @@ class Repository < ActiveRecord::Base
def destroy
with_skip {super} # avoid cascade XML RPC requests
end
later :destroy, queue: :clone_build
later :destroy, queue: :low
def self.custom_sort(repos)
repos.select{ |r| SORT.keys.include?(r.name) }.sort{ |a,b| SORT[a.name] <=> SORT[b.name] } | repos.sort_by(&:name)

View File

@ -31,7 +31,7 @@ json.build_list do
json.builder do
json.fullname @build_list.builder.try(:fullname)
json.path user_path(@build_list.builder)
end if @build_list.builder
end if @build_list.builder && (!@build_list.builder.system? || current_user.admin?)
json.advisory do
json.(@build_list.advisory, :description, :advisory_id)

View File

@ -14,6 +14,7 @@ common: &common
port: 6379
keys:
key_pair_secret_key: 'key_pair_secret_key'
node_instruction_secret_key: 'node_instruction_secret_key'
airbrake_api_key: 'airbrake_api_key'
logentries_key: 'logentries_key'
secret_token: 'secret_token'

View File

@ -14,6 +14,7 @@ common: &common
port: 6379
keys:
key_pair_secret_key: 'key_pair_secret_key'
node_instruction_secret_key: 'node_instruction_secret_key'
airbrake_api_key: 'airbrake_api_key'
devise_pepper: 'e295a79fb7966e94a6e8b184ba65791a'
secret_token: 'e295a79fb7966e94a6e8b184ba65791a'

View File

@ -2,26 +2,33 @@ clean_rpm_build_nodes:
every:
- '1m'
class: 'CleanRpmBuildNodeJob'
queue: hook
queue: middle
description: 'Cleans RPM build nodes'
build_lists_publish_task_manager:
every:
- '3m'
class: 'BuildListsPublishTaskManagerJob'
queue: hook
queue: middle
description: 'Creates tasks for publishing'
build_lists_queues_monitoring:
every:
- '1m'
class: 'BuildListsQueuesMonitoringJob'
queue: hook
queue: middle
description: 'Monitoring for "user/mass-build" queues'
clean_api_defender_statistics:
every:
- '1d'
class: 'CleanApiDefenderStatisticsJob'
queue: clone_build
queue: low
description: 'Cleans ApiDefender statistics'
restart_nodes:
every:
- '5m'
class: 'RestartNodesJob'
queue: low
description: 'Restarts unavailable nodes'

View File

@ -0,0 +1,12 @@
class CreateNodeInstructions < ActiveRecord::Migration
def change
create_table :node_instructions do |t|
t.integer :user_id, null: false
t.text :encrypted_instruction, null: false
t.text :output
t.string :status
t.timestamps
end
end
end

View File

@ -11,7 +11,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20140407181059) do
ActiveRecord::Schema.define(version: 20140414195426) do
# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
@ -287,6 +287,56 @@ ActiveRecord::Schema.define(version: 20140407181059) do
t.boolean "increase_release_tag", default: false, null: false
end
create_table "users", force: true do |t|
t.string "name"
t.string "email", default: "", null: false
t.string "encrypted_password", limit: 128, default: "", null: false
t.string "reset_password_token"
t.datetime "reset_password_sent_at"
t.datetime "remember_created_at"
t.datetime "created_at"
t.datetime "updated_at"
t.text "ssh_key"
t.string "uname"
t.string "role"
t.string "language", default: "en"
t.integer "own_projects_count", default: 0, null: false
t.text "professional_experience"
t.string "site"
t.string "company"
t.string "location"
t.string "avatar_file_name"
t.string "avatar_content_type"
t.integer "avatar_file_size"
t.datetime "avatar_updated_at"
t.integer "failed_attempts", default: 0
t.string "unlock_token"
t.datetime "locked_at"
t.string "confirmation_token"
t.datetime "confirmed_at"
t.datetime "confirmation_sent_at"
t.string "authentication_token"
t.integer "build_priority", default: 50
t.boolean "sound_notifications", default: true
t.index ["authentication_token"], :name => "index_users_on_authentication_token"
t.index ["confirmation_token"], :name => "index_users_on_confirmation_token", :unique => true
t.index ["email"], :name => "index_users_on_email", :unique => true
t.index ["reset_password_token"], :name => "index_users_on_reset_password_token", :unique => true
t.index ["uname"], :name => "index_users_on_uname", :unique => true
t.index ["unlock_token"], :name => "index_users_on_unlock_token", :unique => true
end
create_table "node_instructions", force: true do |t|
t.integer "user_id", null: false
t.text "encrypted_instruction", null: false
t.text "output"
t.string "status"
t.datetime "created_at"
t.datetime "updated_at"
t.index ["user_id"], :name => "fk__node_instructions_user_id"
t.foreign_key ["user_id"], "users", ["id"], :on_update => :no_action, :on_delete => :no_action, :name => "fk_node_instructions_user_id"
end
create_table "platform_arch_settings", force: true do |t|
t.integer "platform_id", null: false
t.integer "arch_id", null: false
@ -530,43 +580,4 @@ ActiveRecord::Schema.define(version: 20140407181059) do
t.index ["subject_id", "subject_type"], :name => "index_tokens_on_subject_id_and_subject_type"
end
create_table "users", force: true do |t|
t.string "name"
t.string "email", default: "", null: false
t.string "encrypted_password", limit: 128, default: "", null: false
t.string "reset_password_token"
t.datetime "reset_password_sent_at"
t.datetime "remember_created_at"
t.datetime "created_at"
t.datetime "updated_at"
t.text "ssh_key"
t.string "uname"
t.string "role"
t.string "language", default: "en"
t.integer "own_projects_count", default: 0, null: false
t.text "professional_experience"
t.string "site"
t.string "company"
t.string "location"
t.string "avatar_file_name"
t.string "avatar_content_type"
t.integer "avatar_file_size"
t.datetime "avatar_updated_at"
t.integer "failed_attempts", default: 0
t.string "unlock_token"
t.datetime "locked_at"
t.string "confirmation_token"
t.datetime "confirmed_at"
t.datetime "confirmation_sent_at"
t.string "authentication_token"
t.integer "build_priority", default: 50
t.boolean "sound_notifications", default: true
t.index ["authentication_token"], :name => "index_users_on_authentication_token"
t.index ["confirmation_token"], :name => "index_users_on_confirmation_token", :unique => true
t.index ["email"], :name => "index_users_on_email", :unique => true
t.index ["reset_password_token"], :name => "index_users_on_reset_password_token", :unique => true
t.index ["uname"], :name => "index_users_on_uname", :unique => true
t.index ["unlock_token"], :name => "index_users_on_unlock_token", :unique => true
end
end

View File

@ -32,9 +32,11 @@ Capistrano::Configuration.instance(:must_exist).load do
:fork_import,
:hook,
:clone_build,
:middle,
:notification
].join(',')
run "cd #{fetch :current_path} && COUNT=#{workers_count} QUEUE=#{queue} #{rails_env} BACKGROUND=yes bundle exec rake resque:workers"
run "cd #{fetch :current_path} && COUNT=#{workers_count - 1} QUEUE=#{queue} INTERVAL=0.1 #{rails_env} BACKGROUND=yes bundle exec rake resque:workers"
run "cd #{fetch :current_path} && COUNT=1 QUEUE=low #{rails_env} BACKGROUND=yes bundle exec rake resque:workers"
end
def remote_file_exists?(full_path)

View File

@ -0,0 +1,5 @@
require 'spec_helper'
describe Admin::NodeInstructionsController do
it_should_behave_like 'an admin controller'
end

View File

@ -0,0 +1,6 @@
FactoryGirl.define do
factory :node_instruction do
association :user, factory: :system_user
instruction { FactoryGirl.generate(:string) }
end
end

View File

@ -0,0 +1,6 @@
FactoryGirl.define do
factory :rpm_build_node do
id { FactoryGirl.generate(:string) }
user_id { FactoryGirl.create(:user).id }
end
end

View File

@ -12,4 +12,8 @@ FactoryGirl.define do
factory :admin, parent: :user do
role 'admin'
end
factory :system_user, parent: :user do
role 'system'
end
end

View File

@ -0,0 +1,40 @@
require 'spec_helper'
describe RestartNodesJob do
it 'ensures that not raises error' do
lambda do
RestartNodesJob.perform
end.should_not raise_exception
end
it 'ensures that do nothing when all instructions disabled' do
NodeInstruction.lock_all
expect(RpmBuildNode).to_not receive(:all)
RestartNodesJob.perform
end
it 'ensures that creates tasks' do
allow_any_instance_of(NodeInstruction).to receive(:perform_restart)
# ABF active node
ni1 = FactoryGirl.create(:node_instruction)
FactoryGirl.create(:rpm_build_node, user_id: ni1.user_id)
# User node
FactoryGirl.create(:rpm_build_node)
FactoryGirl.create(:node_instruction, status: NodeInstruction::DISABLED)
ni2 = FactoryGirl.create(:node_instruction, status: NodeInstruction::RESTARTING)
FactoryGirl.create(:node_instruction, status: NodeInstruction::FAILED)
ni3 = FactoryGirl.create(:node_instruction)
RestartNodesJob.perform
NodeInstruction.where(status: NodeInstruction::RESTARTING).should have(2).items
NodeInstruction.where(status: NodeInstruction::RESTARTING).should include(ni2, ni3)
NodeInstruction.where(status: NodeInstruction::RESTARTING).should_not include(ni1)
end
end

View File

@ -0,0 +1,9 @@
require 'spec_helper'
describe NodeInstruction do
it 'is valid given valid attributes' do
FactoryGirl.build(:node_instruction).should be_valid
end
end